blob: 68643c50980f966a6d3358e47bf6311600ed4836 [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Chuanxiao Dongcb03ec62022-01-20 08:25:38 +08005use std::cmp::{max, Reverse};
Chia-I Wu7f0f7c12022-01-12 10:42:18 -08006use std::collections::{BTreeMap, HashSet};
Mattias Nisslerbbd91d02021-12-07 08:57:45 +00007use std::convert::{TryFrom, TryInto};
John Batesb220eac2020-09-14 17:03:02 -07008#[cfg(feature = "gpu")]
9use std::env;
Dylan Reid059a1882018-07-23 17:58:09 -070010use std::fs::{File, OpenOptions};
Federico 'Morg' Pareschia1184822021-09-09 10:52:58 +090011use std::io::stdin;
Steven Richmanf32d0b42020-06-20 21:45:32 -070012use std::iter;
Daniel Verkamp94c35272019-09-12 13:31:30 -070013use std::mem;
David Tolnay2b089fc2019-03-04 15:33:22 -080014use std::net::Ipv4Addr;
Abhishek Bhardwaj103c1b72021-11-01 15:52:23 -070015use std::os::unix::net::UnixListener;
Christian Blichmann50f95912021-11-05 16:59:39 +010016use std::os::unix::{io::FromRawFd, net::UnixStream, prelude::OpenOptionsExt};
Zach Reizner39aa26b2017-12-12 18:03:23 -080017use std::path::{Path, PathBuf};
Chirantan Ekbote448516e2018-07-24 16:07:42 -070018use std::str;
Dylan Reidb0492662019-05-17 14:50:13 -070019use std::sync::{mpsc, Arc, Barrier};
Hikaru Nishida584e52c2021-04-27 17:37:08 +090020use std::time::Duration;
Dylan Reidb0492662019-05-17 14:50:13 -070021
Zach Reizner39aa26b2017-12-12 18:03:23 -080022use std::thread;
23use std::thread::JoinHandle;
24
Dmitry Torokhov2e6e61d2022-01-24 13:39:09 -080025use libc::{self, c_int, c_ulong, gid_t, uid_t};
Zach Reizner39aa26b2017-12-12 18:03:23 -080026
Tomasz Jeznach42644642020-05-20 23:27:59 -070027use acpi_tables::sdt::SDT;
28
Daniel Verkamp6b298582021-08-16 15:37:11 -070029use anyhow::{anyhow, bail, Context, Result};
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +090030use base::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080031use base::*;
Keiichi Watanabe553d2192021-08-16 16:42:27 +090032use devices::serial_device::{SerialHardware, SerialParameters};
Zide Chenafdb9382021-06-17 12:04:43 -070033use devices::vfio::{VfioCommonSetup, VfioCommonTrait};
Woody Chow0b2b6062021-09-03 15:40:02 +090034#[cfg(feature = "audio_cras")]
35use devices::virtio::snd::cras_backend::Parameters as CrasSndParameters;
Abhishek Bhardwaj103c1b72021-11-01 15:52:23 -070036use devices::virtio::vhost::user::proxy::VirtioVhostUser;
Woody Chow1b16db12021-04-02 16:59:59 +090037#[cfg(feature = "audio")]
38use devices::virtio::vhost::user::vmm::Snd as VhostUserSnd;
Keiichi Watanabefb36e0c2021-08-13 18:48:31 +090039use devices::virtio::vhost::user::vmm::{
Richard5afeafa2021-07-26 19:02:09 -070040 Block as VhostUserBlock, Console as VhostUserConsole, Fs as VhostUserFs,
Chirantan Ekbote84091e52021-09-10 18:43:17 +090041 Mac80211Hwsim as VhostUserMac80211Hwsim, Net as VhostUserNet, Vsock as VhostUserVsock,
42 Wl as VhostUserWl,
Keiichi Watanabe60686582021-03-12 04:53:51 +090043};
Alexandre Courbotb42b3e52021-07-09 23:38:57 +090044#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
45use devices::virtio::VideoBackendType;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070046use devices::virtio::{self, Console, VirtioDevice};
Chirantan Ekbote44292f52021-06-25 18:31:41 +090047#[cfg(feature = "gpu")]
48use devices::virtio::{
Chia-I Wu16fb6592021-11-10 11:45:32 -080049 gpu::{GpuRenderServerParameters, DEFAULT_DISPLAY_HEIGHT, DEFAULT_DISPLAY_WIDTH},
Chirantan Ekbote44292f52021-06-25 18:31:41 +090050 vhost::user::vmm::Gpu as VhostUserGpu,
51 EventDevice,
52};
paulhsiace17e6e2020-08-28 18:37:45 +080053#[cfg(feature = "audio")]
54use devices::Ac97Dev;
Xiong Zhang17b0daf2019-04-23 17:14:50 +080055use devices::{
Xiong Zhangf82f2dc2021-05-21 16:54:12 +080056 self, BusDeviceObj, HostHotPlugKey, HotPlugBus, IrqChip, IrqEventIndex, KvmKernelIrqChip,
57 PciAddress, PciBridge, PciDevice, PcieRootPort, StubPciDevice, VcpuRunState, VfioContainer,
58 VfioDevice, VfioPciDevice, VfioPlatformDevice, VirtioPciDevice,
Xiong Zhang17b0daf2019-04-23 17:14:50 +080059};
Chuanxiao Donga8d427b2022-01-07 10:26:24 +080060use devices::{CoIommuDev, IommuDevType};
Daniel Verkampf1439d42021-05-21 13:55:10 -070061#[cfg(feature = "usb")]
62use devices::{HostBackendDeviceProvider, XhciController};
Steven Richmanf32d0b42020-06-20 21:45:32 -070063use hypervisor::kvm::{Kvm, KvmVcpu, KvmVm};
Andrew Walbran00f1c9f2021-12-10 17:13:08 +000064use hypervisor::{HypervisorCap, ProtectionType, Vcpu, VcpuExit, VcpuRunHandle, Vm, VmCap};
Allen Webbf3024c82020-06-19 07:19:48 -070065use minijail::{self, Minijail};
Richard5afeafa2021-07-26 19:02:09 -070066use net_util::{MacAddress, Tap};
Xiong Zhang87a3b442019-10-29 17:32:44 +080067use resources::{Alloc, MmioType, SystemAllocator};
Gurchetan Singh293913c2020-12-09 10:44:13 -080068use rutabaga_gfx::RutabagaGralloc;
Dylan Reidb0492662019-05-17 14:50:13 -070069use sync::Mutex;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080070use vm_control::*;
Sergey Senozhatskyd78d05b2021-04-13 20:59:58 +090071use vm_memory::{GuestAddress, GuestMemory, MemoryPolicy};
Zach Reizner39aa26b2017-12-12 18:03:23 -080072
Keiichi Watanabec5262e92020-10-21 15:57:33 +090073#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
74use crate::gdb::{gdb_thread, GdbStub};
Keiichi Watanabef3a37f42021-01-21 15:41:11 +090075use crate::{
Tomasz Nowicki71aca792021-06-09 18:53:49 +000076 Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption, VfioType,
Christian Blichmann50f95912021-11-05 16:59:39 +010077 VhostUserFsOption, VhostUserOption, VhostUserWlOption, VhostVsockDeviceParameter,
Keiichi Watanabef3a37f42021-01-21 15:41:11 +090078};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070079use arch::{
Keiichi Watanabe553d2192021-08-16 16:42:27 +090080 self, LinuxArch, RunnableLinuxVm, VcpuAffinity, VirtioDeviceStub, VmComponents, VmImage,
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070081};
Sonny Raoed517d12018-02-13 22:09:43 -080082
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080083#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070084use {
85 aarch64::AArch64 as Arch,
Steven Richman11dc6712020-09-02 15:39:14 -070086 devices::IrqChipAArch64 as IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -070087 hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
88};
Zach Reizner55a9e502018-10-03 10:22:32 -070089#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070090use {
Steven Richman11dc6712020-09-02 15:39:14 -070091 devices::{IrqChipX86_64 as IrqChipArch, KvmSplitIrqChip},
92 hypervisor::{VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
Steven Richmanf32d0b42020-06-20 21:45:32 -070093 x86_64::X8664arch as Arch,
94};
Zach Reizner39aa26b2017-12-12 18:03:23 -080095
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080096enum TaggedControlTube {
97 Fs(Tube),
98 Vm(Tube),
99 VmMemory(Tube),
100 VmIrq(Tube),
101 VmMsync(Tube),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700102}
103
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800104impl AsRef<Tube> for TaggedControlTube {
105 fn as_ref(&self) -> &Tube {
106 use self::TaggedControlTube::*;
Jakub Starond99cd0a2019-04-11 14:09:39 -0700107 match &self {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800108 Fs(tube) | Vm(tube) | VmMemory(tube) | VmIrq(tube) | VmMsync(tube) => tube,
Jakub Starond99cd0a2019-04-11 14:09:39 -0700109 }
110 }
111}
112
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800113impl AsRawDescriptor for TaggedControlTube {
Michael Hoylee392c462020-10-07 03:29:24 -0700114 fn as_raw_descriptor(&self) -> RawDescriptor {
Michael Hoylea596a072020-11-10 19:32:45 -0800115 self.as_ref().as_raw_descriptor()
Jakub Starond99cd0a2019-04-11 14:09:39 -0700116 }
117}
118
Matt Delcoc24ad782020-02-14 13:24:36 -0800119struct SandboxConfig<'a> {
120 limit_caps: bool,
121 log_failures: bool,
122 seccomp_policy: &'a Path,
123 uid_map: Option<&'a str>,
124 gid_map: Option<&'a str>,
Dmitry Torokhov2e6e61d2022-01-24 13:39:09 -0800125 remount_mode: Option<c_ulong>,
Matt Delcoc24ad782020-02-14 13:24:36 -0800126}
127
Zach Reizner44863792019-06-26 14:22:08 -0700128fn create_base_minijail(
129 root: &Path,
Matt Delcoc24ad782020-02-14 13:24:36 -0800130 r_limit: Option<u64>,
131 config: Option<&SandboxConfig>,
Zach Reizner44863792019-06-26 14:22:08 -0700132) -> Result<Minijail> {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800133 // All child jails run in a new user namespace without any users mapped,
134 // they run as nobody unless otherwise configured.
Daniel Verkamp6b298582021-08-16 15:37:11 -0700135 let mut j = Minijail::new().context("failed to jail device")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800136
137 if let Some(config) = config {
138 j.namespace_pids();
139 j.namespace_user();
140 j.namespace_user_disable_setgroups();
141 if config.limit_caps {
142 // Don't need any capabilities.
143 j.use_caps(0);
144 }
145 if let Some(uid_map) = config.uid_map {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700146 j.uidmap(uid_map).context("error setting UID map")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800147 }
148 if let Some(gid_map) = config.gid_map {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700149 j.gidmap(gid_map).context("error setting GID map")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800150 }
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900151 // Run in a new mount namespace.
152 j.namespace_vfs();
153
Matt Delcoc24ad782020-02-14 13:24:36 -0800154 // Run in an empty network namespace.
155 j.namespace_net();
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900156
157 // Don't allow the device to gain new privileges.
Matt Delcoc24ad782020-02-14 13:24:36 -0800158 j.no_new_privs();
159
160 // By default we'll prioritize using the pre-compiled .bpf over the .policy
161 // file (the .bpf is expected to be compiled using "trap" as the failure
162 // behavior instead of the default "kill" behavior).
163 // Refer to the code comment for the "seccomp-log-failures"
164 // command-line parameter for an explanation about why the |log_failures|
165 // flag forces the use of .policy files (and the build-time alternative to
166 // this run-time flag).
167 let bpf_policy_file = config.seccomp_policy.with_extension("bpf");
168 if bpf_policy_file.exists() && !config.log_failures {
169 j.parse_seccomp_program(&bpf_policy_file)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700170 .context("failed to parse precompiled seccomp policy")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800171 } else {
172 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP,
173 // which will correctly kill the entire device process if a worker
174 // thread commits a seccomp violation.
175 j.set_seccomp_filter_tsync();
176 if config.log_failures {
177 j.log_seccomp_filter_failures();
178 }
179 j.parse_seccomp_filters(&config.seccomp_policy.with_extension("policy"))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700180 .context("failed to parse seccomp policy")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800181 }
182 j.use_seccomp_filter();
183 // Don't do init setup.
184 j.run_as_init();
Dmitry Torokhov2e6e61d2022-01-24 13:39:09 -0800185 // Set up requested remount mode instead of default MS_PRIVATE.
186 if let Some(mode) = config.remount_mode {
187 j.set_remount_mode(mode);
188 }
Matt Delcoc24ad782020-02-14 13:24:36 -0800189 }
190
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900191 // Only pivot_root if we are not re-using the current root directory.
192 if root != Path::new("/") {
193 // It's safe to call `namespace_vfs` multiple times.
194 j.namespace_vfs();
Daniel Verkamp6b298582021-08-16 15:37:11 -0700195 j.enter_pivot_root(root)
196 .context("failed to pivot root device")?;
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900197 }
Matt Delco45caf912019-11-13 08:11:09 -0800198
Matt Delcoc24ad782020-02-14 13:24:36 -0800199 // Most devices don't need to open many fds.
200 let limit = if let Some(r) = r_limit { r } else { 1024u64 };
201 j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700202 .context("error setting max open files")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800203
Zach Reizner39aa26b2017-12-12 18:03:23 -0800204 Ok(j)
205}
206
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800207fn simple_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
Lepton Wu9105e9f2019-03-14 11:38:31 -0700208 if cfg.sandbox {
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800209 let pivot_root: &str = option_env!("DEFAULT_PIVOT_ROOT").unwrap_or("/var/empty");
210 // A directory for a jailed device's pivot root.
211 let root_path = Path::new(pivot_root);
212 if !root_path.exists() {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700213 bail!("{} doesn't exist, can't jail devices", pivot_root);
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800214 }
215 let policy_path: PathBuf = cfg.seccomp_policy_dir.join(policy);
Matt Delcoc24ad782020-02-14 13:24:36 -0800216 let config = SandboxConfig {
217 limit_caps: true,
218 log_failures: cfg.seccomp_log_failures,
219 seccomp_policy: &policy_path,
220 uid_map: None,
221 gid_map: None,
Dmitry Torokhov2e6e61d2022-01-24 13:39:09 -0800222 remount_mode: None,
Matt Delcoc24ad782020-02-14 13:24:36 -0800223 };
224 Ok(Some(create_base_minijail(root_path, None, Some(&config))?))
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800225 } else {
226 Ok(None)
227 }
228}
229
Daniel Verkamp6b298582021-08-16 15:37:11 -0700230type DeviceResult<T = VirtioDeviceStub> = Result<T>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800231
Andrew Walbran4cad30a2021-06-28 15:58:08 +0000232fn create_block_device(cfg: &Config, disk: &DiskOption, disk_device_tube: Tube) -> DeviceResult {
Junichi Uekawa7bea39f2021-07-16 14:05:06 +0900233 let raw_image: File = open_file(&disk.path, disk.read_only, disk.o_direct)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700234 .with_context(|| format!("failed to load disk image {}", disk.path.display()))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800235 // Lock the disk image to prevent other crosvm instances from using it.
236 let lock_op = if disk.read_only {
237 FlockOperation::LockShared
238 } else {
239 FlockOperation::LockExclusive
240 };
Daniel Verkamp6b298582021-08-16 15:37:11 -0700241 flock(&raw_image, lock_op, true).context("failed to lock disk image")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800242
Junichi Uekawa52437db2021-09-29 17:33:07 +0900243 info!("Trying to attach block device: {}", disk.path.display());
Daniel Verkamp6b298582021-08-16 15:37:11 -0700244 let dev = if disk::async_ok(&raw_image).context("failed to check disk async_ok")? {
245 let async_file = disk::create_async_disk_file(raw_image)
246 .context("failed to create async virtual disk")?;
Dylan Reid503c5ab2020-07-17 11:20:07 -0700247 Box::new(
248 virtio::BlockAsync::new(
249 virtio::base_features(cfg.protected_vm),
250 async_file,
251 disk.read_only,
252 disk.sparse,
253 disk.block_size,
Daniel Verkampdd0ee592021-03-29 13:05:22 -0700254 disk.id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800255 Some(disk_device_tube),
Dylan Reid503c5ab2020-07-17 11:20:07 -0700256 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700257 .context("failed to create block device")?,
Dylan Reid503c5ab2020-07-17 11:20:07 -0700258 ) as Box<dyn VirtioDevice>
259 } else {
Daniel Verkampeb1640e2021-09-07 14:09:31 -0700260 let disk_file = disk::create_disk_file(raw_image, disk::MAX_NESTING_DEPTH)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700261 .context("failed to create virtual disk")?;
Dylan Reid503c5ab2020-07-17 11:20:07 -0700262 Box::new(
263 virtio::Block::new(
264 virtio::base_features(cfg.protected_vm),
265 disk_file,
266 disk.read_only,
267 disk.sparse,
268 disk.block_size,
269 disk.id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800270 Some(disk_device_tube),
Dylan Reid503c5ab2020-07-17 11:20:07 -0700271 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700272 .context("failed to create block device")?,
Dylan Reid503c5ab2020-07-17 11:20:07 -0700273 ) as Box<dyn VirtioDevice>
274 };
David Tolnay2b089fc2019-03-04 15:33:22 -0800275
276 Ok(VirtioDeviceStub {
Dylan Reid503c5ab2020-07-17 11:20:07 -0700277 dev,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700278 jail: simple_jail(cfg, "block_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800279 })
280}
281
Keiichi Watanabef3a37f42021-01-21 15:41:11 +0900282fn create_vhost_user_block_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
283 let dev = VhostUserBlock::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700284 .context("failed to set up vhost-user block device")?;
Keiichi Watanabef3a37f42021-01-21 15:41:11 +0900285
286 Ok(VirtioDeviceStub {
287 dev: Box::new(dev),
288 // no sandbox here because virtqueue handling is exported to a different process.
289 jail: None,
290 })
291}
292
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +0900293fn create_vhost_user_console_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
294 let dev = VhostUserConsole::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700295 .context("failed to set up vhost-user console device")?;
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +0900296
297 Ok(VirtioDeviceStub {
298 dev: Box::new(dev),
299 // no sandbox here because virtqueue handling is exported to a different process.
300 jail: None,
301 })
302}
303
Woody Chow5890b702021-02-12 14:57:02 +0900304fn create_vhost_user_fs_device(cfg: &Config, option: &VhostUserFsOption) -> DeviceResult {
305 let dev = VhostUserFs::new(
306 virtio::base_features(cfg.protected_vm),
307 &option.socket,
308 &option.tag,
309 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700310 .context("failed to set up vhost-user fs device")?;
Woody Chow5890b702021-02-12 14:57:02 +0900311
312 Ok(VirtioDeviceStub {
313 dev: Box::new(dev),
314 // no sandbox here because virtqueue handling is exported to a different process.
315 jail: None,
316 })
317}
318
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900319fn create_vhost_user_mac80211_hwsim_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
320 let dev = VhostUserMac80211Hwsim::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700321 .context("failed to set up vhost-user mac80211_hwsim device")?;
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900322
323 Ok(VirtioDeviceStub {
324 dev: Box::new(dev),
325 // no sandbox here because virtqueue handling is exported to a different process.
326 jail: None,
327 })
328}
329
Woody Chow1b16db12021-04-02 16:59:59 +0900330#[cfg(feature = "audio")]
331fn create_vhost_user_snd_device(cfg: &Config, option: &VhostUserOption) -> DeviceResult {
332 let dev = VhostUserSnd::new(virtio::base_features(cfg.protected_vm), &option.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700333 .context("failed to set up vhost-user snd device")?;
Woody Chow1b16db12021-04-02 16:59:59 +0900334
335 Ok(VirtioDeviceStub {
336 dev: Box::new(dev),
337 // no sandbox here because virtqueue handling is exported to a different process.
338 jail: None,
339 })
340}
341
Abhishek Bhardwaj103c1b72021-11-01 15:52:23 -0700342fn create_vvu_proxy_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
343 let listener = UnixListener::bind(&opt.socket).map_err(|e| {
344 error!("failed to bind listener for vvu proxy device: {}", e);
345 e
346 })?;
347
348 let dev = VirtioVhostUser::new(virtio::base_features(cfg.protected_vm), listener)
349 .context("failed to create VVU proxy device")?;
350
351 Ok(VirtioDeviceStub {
352 dev: Box::new(dev),
353 jail: simple_jail(cfg, "vvu_proxy_device")?,
354 })
355}
356
David Tolnay2b089fc2019-03-04 15:33:22 -0800357fn create_rng_device(cfg: &Config) -> DeviceResult {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700358 let dev = virtio::Rng::new(virtio::base_features(cfg.protected_vm))
359 .context("failed to set up rng")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800360
361 Ok(VirtioDeviceStub {
362 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700363 jail: simple_jail(cfg, "rng_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800364 })
365}
366
Woody Chow737ff122021-03-22 17:49:57 +0900367#[cfg(feature = "audio_cras")]
Woody Chow0b2b6062021-09-03 15:40:02 +0900368fn create_cras_snd_device(cfg: &Config, cras_snd: CrasSndParameters) -> DeviceResult {
369 let dev = virtio::snd::cras_backend::VirtioSndCras::new(
370 virtio::base_features(cfg.protected_vm),
371 cras_snd,
372 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700373 .context("failed to create cras sound device")?;
Woody Chow737ff122021-03-22 17:49:57 +0900374
375 let jail = match simple_jail(&cfg, "cras_snd_device")? {
376 Some(mut jail) => {
377 // Create a tmpfs in the device's root directory for cras_snd_device.
378 // The size is 20*1024, or 20 KB.
379 jail.mount_with_data(
380 Path::new("none"),
381 Path::new("/"),
382 "tmpfs",
383 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
384 "size=20480",
385 )?;
386
387 let run_cras_path = Path::new("/run/cras");
388 jail.mount_bind(run_cras_path, run_cras_path, true)?;
389
390 add_current_user_to_jail(&mut jail)?;
391
392 Some(jail)
393 }
394 None => None,
395 };
396
397 Ok(VirtioDeviceStub {
398 dev: Box::new(dev),
399 jail,
400 })
401}
402
David Tolnay2b089fc2019-03-04 15:33:22 -0800403#[cfg(feature = "tpm")]
404fn create_tpm_device(cfg: &Config) -> DeviceResult {
405 use std::ffi::CString;
406 use std::fs;
407 use std::process;
David Tolnay2b089fc2019-03-04 15:33:22 -0800408
409 let tpm_storage: PathBuf;
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700410 let mut tpm_jail = simple_jail(cfg, "tpm_device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800411
412 match &mut tpm_jail {
413 Some(jail) => {
414 // Create a tmpfs in the device's root directory for tpm
415 // simulator storage. The size is 20*1024, or 20 KB.
416 jail.mount_with_data(
417 Path::new("none"),
418 Path::new("/"),
419 "tmpfs",
420 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
421 "size=20480",
422 )?;
423
Fergus Dall51200512021-08-19 12:54:26 +1000424 let crosvm_ids = add_current_user_to_jail(jail)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800425
426 let pid = process::id();
427 let tpm_pid_dir = format!("/run/vm/tpm.{}", pid);
428 tpm_storage = Path::new(&tpm_pid_dir).to_owned();
Daniel Verkamp6b298582021-08-16 15:37:11 -0700429 fs::create_dir_all(&tpm_storage).with_context(|| {
430 format!("failed to create tpm storage dir {}", tpm_storage.display())
431 })?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800432 let tpm_pid_dir_c = CString::new(tpm_pid_dir).expect("no nul bytes");
David Tolnayfd0971d2019-03-04 17:15:57 -0800433 chown(&tpm_pid_dir_c, crosvm_ids.uid, crosvm_ids.gid)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700434 .context("failed to chown tpm storage")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800435
436 jail.mount_bind(&tpm_storage, &tpm_storage, true)?;
437 }
438 None => {
439 // Path used inside cros_sdk which does not have /run/vm.
440 tpm_storage = Path::new("/tmp/tpm-simulator").to_owned();
441 }
442 }
443
444 let dev = virtio::Tpm::new(tpm_storage);
445
446 Ok(VirtioDeviceStub {
447 dev: Box::new(dev),
448 jail: tpm_jail,
449 })
450}
451
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700452fn create_single_touch_device(
453 cfg: &Config,
454 single_touch_spec: &TouchDeviceOption,
455 idx: u32,
456) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800457 let socket = single_touch_spec
458 .get_path()
459 .into_unix_stream()
460 .map_err(|e| {
461 error!("failed configuring virtio single touch: {:?}", e);
462 e
463 })?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800464
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800465 let (width, height) = single_touch_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700466 let dev = virtio::new_single_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700467 idx,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700468 socket,
469 width,
470 height,
471 virtio::base_features(cfg.protected_vm),
472 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700473 .context("failed to set up input device")?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800474 Ok(VirtioDeviceStub {
475 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700476 jail: simple_jail(cfg, "input_device")?,
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800477 })
478}
479
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700480fn create_multi_touch_device(
481 cfg: &Config,
482 multi_touch_spec: &TouchDeviceOption,
483 idx: u32,
484) -> DeviceResult {
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000485 let socket = multi_touch_spec
486 .get_path()
487 .into_unix_stream()
488 .map_err(|e| {
489 error!("failed configuring virtio multi touch: {:?}", e);
490 e
491 })?;
492
493 let (width, height) = multi_touch_spec.get_size();
494 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700495 idx,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000496 socket,
497 width,
498 height,
499 virtio::base_features(cfg.protected_vm),
500 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700501 .context("failed to set up input device")?;
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000502
503 Ok(VirtioDeviceStub {
504 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700505 jail: simple_jail(cfg, "input_device")?,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000506 })
507}
508
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700509fn create_trackpad_device(
510 cfg: &Config,
511 trackpad_spec: &TouchDeviceOption,
512 idx: u32,
513) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800514 let socket = trackpad_spec.get_path().into_unix_stream().map_err(|e| {
Maciek Swiechc3011222021-11-24 21:01:04 +0000515 error!("failed configuring virtio trackpad: {:#}", e);
David Tolnay2b089fc2019-03-04 15:33:22 -0800516 e
517 })?;
518
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800519 let (width, height) = trackpad_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700520 let dev = virtio::new_trackpad(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700521 idx,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700522 socket,
523 width,
524 height,
525 virtio::base_features(cfg.protected_vm),
526 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700527 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800528
529 Ok(VirtioDeviceStub {
530 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700531 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800532 })
533}
534
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700535fn create_mouse_device<T: IntoUnixStream>(cfg: &Config, mouse_socket: T, idx: u32) -> DeviceResult {
Zach Reizner65b98f12019-11-22 17:34:58 -0800536 let socket = mouse_socket.into_unix_stream().map_err(|e| {
Maciek Swiechc3011222021-11-24 21:01:04 +0000537 error!("failed configuring virtio mouse: {:#}", e);
David Tolnay2b089fc2019-03-04 15:33:22 -0800538 e
539 })?;
540
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700541 let dev = virtio::new_mouse(idx, socket, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700542 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800543
544 Ok(VirtioDeviceStub {
545 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700546 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800547 })
548}
549
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700550fn create_keyboard_device<T: IntoUnixStream>(
551 cfg: &Config,
552 keyboard_socket: T,
553 idx: u32,
554) -> DeviceResult {
Zach Reizner65b98f12019-11-22 17:34:58 -0800555 let socket = keyboard_socket.into_unix_stream().map_err(|e| {
Maciek Swiechc3011222021-11-24 21:01:04 +0000556 error!("failed configuring virtio keyboard: {:#}", e);
David Tolnay2b089fc2019-03-04 15:33:22 -0800557 e
558 })?;
559
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700560 let dev = virtio::new_keyboard(idx, socket, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700561 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800562
563 Ok(VirtioDeviceStub {
564 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700565 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800566 })
567}
568
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700569fn create_switches_device<T: IntoUnixStream>(
570 cfg: &Config,
571 switches_socket: T,
572 idx: u32,
573) -> DeviceResult {
Daniel Norman5e23df72021-03-11 10:11:02 -0800574 let socket = switches_socket.into_unix_stream().map_err(|e| {
Maciek Swiechc3011222021-11-24 21:01:04 +0000575 error!("failed configuring virtio switches: {:#}", e);
Daniel Norman5e23df72021-03-11 10:11:02 -0800576 e
577 })?;
578
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700579 let dev = virtio::new_switches(idx, socket, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700580 .context("failed to set up input device")?;
Daniel Norman5e23df72021-03-11 10:11:02 -0800581
582 Ok(VirtioDeviceStub {
583 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700584 jail: simple_jail(cfg, "input_device")?,
Daniel Norman5e23df72021-03-11 10:11:02 -0800585 })
586}
587
David Tolnay2b089fc2019-03-04 15:33:22 -0800588fn create_vinput_device(cfg: &Config, dev_path: &Path) -> DeviceResult {
589 let dev_file = OpenOptions::new()
590 .read(true)
591 .write(true)
592 .open(dev_path)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700593 .with_context(|| format!("failed to open vinput device {}", dev_path.display()))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800594
Noah Goldd4ca29b2020-10-27 12:21:52 -0700595 let dev = virtio::new_evdev(dev_file, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700596 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800597
598 Ok(VirtioDeviceStub {
599 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700600 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800601 })
602}
603
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800604fn create_balloon_device(cfg: &Config, tube: Tube, inflate_tube: Option<Tube>) -> DeviceResult {
605 let dev = virtio::Balloon::new(virtio::base_features(cfg.protected_vm), tube, inflate_tube)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700606 .context("failed to create balloon")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800607
608 Ok(VirtioDeviceStub {
609 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700610 jail: simple_jail(cfg, "balloon_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800611 })
612}
613
Alexandre Courbot911773a2021-12-10 14:31:10 +0900614/// Generic method for creating a network device. `create_device` is a closure that takes the virtio
615/// features and number of queue pairs as parameters, and is responsible for creating the device
616/// itself.
617fn create_net_device<F, T>(cfg: &Config, policy: &str, create_device: F) -> DeviceResult
618where
619 F: Fn(u64, u16) -> Result<T>,
620 T: VirtioDevice + 'static,
621{
Xiong Zhang773c7072020-03-20 10:39:55 +0800622 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
623 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700624 if vcpu_count < vq_pairs as usize {
Alexandre Courbot911773a2021-12-10 14:31:10 +0900625 warn!("the number of net vq pairs must not exceed the vcpu count, falling back to single queue mode");
Xiong Zhang773c7072020-03-20 10:39:55 +0800626 vq_pairs = 1;
627 }
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100628 let features = virtio::base_features(cfg.protected_vm);
Alexandre Courbot911773a2021-12-10 14:31:10 +0900629
630 let dev = create_device(features, vq_pairs)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800631
632 Ok(VirtioDeviceStub {
Alexandre Courbot911773a2021-12-10 14:31:10 +0900633 dev: Box::new(dev) as Box<dyn VirtioDevice>,
634 jail: simple_jail(cfg, policy)?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800635 })
636}
637
Alexandre Courbot911773a2021-12-10 14:31:10 +0900638/// Returns a network device created from a new TAP interface configured with `host_ip`, `netmask`,
639/// and `mac_address`.
640fn create_net_device_from_config(
David Tolnay2b089fc2019-03-04 15:33:22 -0800641 cfg: &Config,
642 host_ip: Ipv4Addr,
643 netmask: Ipv4Addr,
644 mac_address: MacAddress,
David Tolnay2b089fc2019-03-04 15:33:22 -0800645) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800646 let policy = if cfg.vhost_net {
Matt Delco45caf912019-11-13 08:11:09 -0800647 "vhost_net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800648 } else {
Matt Delco45caf912019-11-13 08:11:09 -0800649 "net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800650 };
651
Alexandre Courbot911773a2021-12-10 14:31:10 +0900652 if cfg.vhost_net {
653 create_net_device(cfg, policy, |features, _vq_pairs| {
654 virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(
655 &cfg.vhost_net_device_path,
656 features,
657 host_ip,
658 netmask,
659 mac_address,
660 )
661 .context("failed to set up vhost networking")
662 })
663 } else {
664 create_net_device(cfg, policy, |features, vq_pairs| {
665 virtio::Net::<Tap>::new(features, host_ip, netmask, mac_address, vq_pairs)
666 .context("failed to create virtio network device")
667 })
668 }
669}
670
671/// Returns a network device from a file descriptor to a configured TAP interface.
672fn create_tap_net_device_from_fd(cfg: &Config, tap_fd: RawDescriptor) -> DeviceResult {
673 create_net_device(cfg, "net_device", |features, vq_pairs| {
674 // Safe because we ensure that we get a unique handle to the fd.
675 let tap = unsafe {
676 Tap::from_raw_descriptor(
677 validate_raw_descriptor(tap_fd).context("failed to validate tap descriptor")?,
678 )
679 .context("failed to create tap device")?
680 };
681
682 virtio::Net::from(features, tap, vq_pairs).context("failed to create tap net device")
David Tolnay2b089fc2019-03-04 15:33:22 -0800683 })
684}
685
Alexandre Courbot993aa7f2021-12-09 14:51:29 +0900686/// Returns a network device created by opening the persistent, configured TAP interface `tap_name`.
687fn create_tap_net_device_from_name(cfg: &Config, tap_name: &[u8]) -> DeviceResult {
688 create_net_device(cfg, "net_device", |features, vq_pairs| {
689 virtio::Net::<Tap>::new_from_name(features, tap_name, vq_pairs)
690 .context("failed to create configured virtio network device")
691 })
692}
693
Keiichi Watanabe60686582021-03-12 04:53:51 +0900694fn create_vhost_user_net_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
695 let dev = VhostUserNet::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700696 .context("failed to set up vhost-user net device")?;
Keiichi Watanabe60686582021-03-12 04:53:51 +0900697
698 Ok(VirtioDeviceStub {
699 dev: Box::new(dev),
700 // no sandbox here because virtqueue handling is exported to a different process.
701 jail: None,
702 })
703}
704
Chirantan Ekbote84091e52021-09-10 18:43:17 +0900705fn create_vhost_user_vsock_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
706 let dev = VhostUserVsock::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700707 .context("failed to set up vhost-user vsock device")?;
Chirantan Ekbote84091e52021-09-10 18:43:17 +0900708
709 Ok(VirtioDeviceStub {
710 dev: Box::new(dev),
711 // no sandbox here because virtqueue handling is exported to a different process.
712 jail: None,
713 })
714}
715
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900716fn create_vhost_user_wl_device(cfg: &Config, opt: &VhostUserWlOption) -> DeviceResult {
717 // The crosvm wl device expects us to connect the tube before it will accept a vhost-user
718 // connection.
719 let dev = VhostUserWl::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700720 .context("failed to set up vhost-user wl device")?;
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900721
722 Ok(VirtioDeviceStub {
723 dev: Box::new(dev),
724 // no sandbox here because virtqueue handling is exported to a different process.
725 jail: None,
726 })
727}
728
David Tolnay2b089fc2019-03-04 15:33:22 -0800729#[cfg(feature = "gpu")]
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900730fn create_vhost_user_gpu_device(
731 cfg: &Config,
732 opt: &VhostUserOption,
733 host_tube: Tube,
734 device_tube: Tube,
735) -> DeviceResult {
736 // The crosvm gpu device expects us to connect the tube before it will accept a vhost-user
737 // connection.
738 let dev = VhostUserGpu::new(
739 virtio::base_features(cfg.protected_vm),
740 &opt.socket,
741 host_tube,
742 device_tube,
743 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700744 .context("failed to set up vhost-user gpu device")?;
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900745
746 Ok(VirtioDeviceStub {
747 dev: Box::new(dev),
748 // no sandbox here because virtqueue handling is exported to a different process.
749 jail: None,
750 })
751}
752
Alexandre Courbot22740d82021-12-15 17:06:27 +0900753/// Mirror-mount all the directories in `dirs` into `jail` on a best-effort basis.
754///
755/// This function will not return an error if any of the directories in `dirs` is missing.
756#[cfg(any(feature = "gpu", feature = "video-decoder", feature = "video-encoder"))]
757fn jail_mount_bind_if_exists<P: AsRef<std::ffi::OsStr>>(
758 jail: &mut Minijail,
759 dirs: &[P],
760) -> Result<()> {
761 for dir in dirs {
762 let dir_path = Path::new(dir);
763 if dir_path.exists() {
764 jail.mount_bind(dir_path, dir_path, false)?;
765 }
766 }
767
768 Ok(())
769}
770
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900771#[cfg(feature = "gpu")]
Chia-I Wufffb5692021-12-01 13:25:35 -0800772fn gpu_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
773 match simple_jail(cfg, policy)? {
774 Some(mut jail) => {
775 // Create a tmpfs in the device's root directory so that we can bind mount the
776 // dri directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
777 jail.mount_with_data(
778 Path::new("none"),
779 Path::new("/"),
780 "tmpfs",
781 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
782 "size=67108864",
783 )?;
784
785 // Device nodes required for DRM.
786 let sys_dev_char_path = Path::new("/sys/dev/char");
787 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
788 let sys_devices_path = Path::new("/sys/devices");
789 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
790
791 let drm_dri_path = Path::new("/dev/dri");
792 if drm_dri_path.exists() {
793 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
794 }
795
796 // If the ARM specific devices exist on the host, bind mount them in.
797 let mali0_path = Path::new("/dev/mali0");
798 if mali0_path.exists() {
799 jail.mount_bind(mali0_path, mali0_path, true)?;
800 }
801
802 let pvr_sync_path = Path::new("/dev/pvr_sync");
803 if pvr_sync_path.exists() {
804 jail.mount_bind(pvr_sync_path, pvr_sync_path, true)?;
805 }
806
807 // If the udmabuf driver exists on the host, bind mount it in.
808 let udmabuf_path = Path::new("/dev/udmabuf");
809 if udmabuf_path.exists() {
810 jail.mount_bind(udmabuf_path, udmabuf_path, true)?;
811 }
812
813 // Libraries that are required when mesa drivers are dynamically loaded.
Alexandre Courbot22740d82021-12-15 17:06:27 +0900814 jail_mount_bind_if_exists(
815 &mut jail,
816 &[
817 "/usr/lib",
818 "/usr/lib64",
819 "/lib",
820 "/lib64",
Lepton Wua0638452022-01-19 22:49:53 -0800821 "/usr/share/drirc.d",
Alexandre Courbot22740d82021-12-15 17:06:27 +0900822 "/usr/share/glvnd",
823 "/usr/share/vulkan",
824 ],
825 )?;
Chia-I Wufffb5692021-12-01 13:25:35 -0800826
827 // pvr driver requires read access to /proc/self/task/*/comm.
828 let proc_path = Path::new("/proc");
829 jail.mount(
830 proc_path,
831 proc_path,
832 "proc",
833 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY) as usize,
834 )?;
835
836 // To enable perfetto tracing, we need to give access to the perfetto service IPC
837 // endpoints.
838 let perfetto_path = Path::new("/run/perfetto");
839 if perfetto_path.exists() {
840 jail.mount_bind(perfetto_path, perfetto_path, true)?;
841 }
842
843 Ok(Some(jail))
844 }
845 None => Ok(None),
846 }
847}
848
849#[cfg(feature = "gpu")]
Chia-I Wu13ec6962022-01-12 10:42:14 -0800850struct GpuCacheInfo<'a> {
851 directory: Option<&'a str>,
852 environment: Vec<(&'a str, &'a str)>,
853}
854
855#[cfg(feature = "gpu")]
856fn get_gpu_cache_info<'a>(
857 cache_dir: Option<&'a String>,
858 cache_size: Option<&'a String>,
859 sandbox: bool,
860) -> GpuCacheInfo<'a> {
861 let mut dir = None;
862 let mut env = Vec::new();
863
864 if let Some(cache_dir) = cache_dir {
865 if !Path::new(cache_dir).exists() {
866 warn!("shader caching dir {} does not exist", cache_dir);
867 env.push(("MESA_GLSL_CACHE_DISABLE", "true"));
868 } else if cfg!(any(target_arch = "arm", target_arch = "aarch64")) && sandbox {
869 warn!("shader caching not yet supported on ARM with sandbox enabled");
870 env.push(("MESA_GLSL_CACHE_DISABLE", "true"));
871 } else {
872 dir = Some(cache_dir.as_str());
873
874 env.push(("MESA_GLSL_CACHE_DISABLE", "false"));
875 env.push(("MESA_GLSL_CACHE_DIR", cache_dir.as_str()));
876 if let Some(cache_size) = cache_size {
877 env.push(("MESA_GLSL_CACHE_MAX_SIZE", cache_size.as_str()));
878 }
879 }
880 }
881
882 GpuCacheInfo {
883 directory: dir,
884 environment: env,
885 }
886}
887
888#[cfg(feature = "gpu")]
David Tolnay2b089fc2019-03-04 15:33:22 -0800889fn create_gpu_device(
890 cfg: &Config,
Michael Hoyle685316f2020-09-16 15:29:20 -0700891 exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800892 gpu_device_tube: Tube,
893 resource_bridges: Vec<Tube>,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900894 wayland_socket_path: Option<&PathBuf>,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700895 x_display: Option<String>,
Chia-I Wu16fb6592021-11-10 11:45:32 -0800896 render_server_fd: Option<SafeDescriptor>,
Zach Reizner65b98f12019-11-22 17:34:58 -0800897 event_devices: Vec<EventDevice>,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700898 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800899) -> DeviceResult {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700900 let mut display_backends = vec![
901 virtio::DisplayBackend::X(x_display),
Jason Macnak60eb1fb2020-01-09 14:36:29 -0800902 virtio::DisplayBackend::Stub,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700903 ];
904
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700905 let wayland_socket_dirs = cfg
906 .wayland_socket_paths
907 .iter()
908 .map(|(_name, path)| path.parent())
909 .collect::<Option<Vec<_>>>()
Daniel Verkamp6b298582021-08-16 15:37:11 -0700910 .ok_or_else(|| anyhow!("wayland socket path has no parent or file name"))?;
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700911
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900912 if let Some(socket_path) = wayland_socket_path {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700913 display_backends.insert(
914 0,
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700915 virtio::DisplayBackend::Wayland(Some(socket_path.to_owned())),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700916 );
917 }
918
David Tolnay2b089fc2019-03-04 15:33:22 -0800919 let dev = virtio::Gpu::new(
Daniel Verkamp6b298582021-08-16 15:37:11 -0700920 exit_evt.try_clone().context("failed to clone event")?,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800921 Some(gpu_device_tube),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800922 resource_bridges,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700923 display_backends,
Jason Macnakcc7070b2019-11-06 14:48:12 -0800924 cfg.gpu_parameters.as_ref().unwrap(),
Chia-I Wu16fb6592021-11-10 11:45:32 -0800925 render_server_fd,
Zach Reizner65b98f12019-11-22 17:34:58 -0800926 event_devices,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700927 map_request,
928 cfg.sandbox,
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100929 virtio::base_features(cfg.protected_vm),
Gurchetan Singh781d9752021-02-15 17:45:22 -0800930 cfg.wayland_socket_paths.clone(),
David Tolnay2b089fc2019-03-04 15:33:22 -0800931 );
932
Chia-I Wufffb5692021-12-01 13:25:35 -0800933 let jail = match gpu_jail(cfg, "gpu_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800934 Some(mut jail) => {
John Batesb220eac2020-09-14 17:03:02 -0700935 // Prepare GPU shader disk cache directory.
Chia-I Wu13ec6962022-01-12 10:42:14 -0800936 let (cache_dir, cache_size) = cfg
John Batesb220eac2020-09-14 17:03:02 -0700937 .gpu_parameters
938 .as_ref()
Chia-I Wu13ec6962022-01-12 10:42:14 -0800939 .map(|params| (params.cache_path.as_ref(), params.cache_size.as_ref()))
940 .unwrap();
941 let cache_info = get_gpu_cache_info(cache_dir, cache_size, cfg.sandbox);
942
943 if let Some(dir) = cache_info.directory {
944 jail.mount_bind(dir, dir, true)?;
945 }
946 for (key, val) in cache_info.environment {
947 env::set_var(key, val);
John Batesb220eac2020-09-14 17:03:02 -0700948 }
949
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700950 // Bind mount the wayland socket's directory into jail's root. This is necessary since
951 // each new wayland context must open() the socket. If the wayland socket is ever
952 // destroyed and remade in the same host directory, new connections will be possible
953 // without restarting the wayland device.
954 for dir in &wayland_socket_dirs {
955 jail.mount_bind(dir, dir, true)?;
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700956 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800957
Fergus Dall51200512021-08-19 12:54:26 +1000958 add_current_user_to_jail(&mut jail)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800959
960 Some(jail)
961 }
962 None => None,
963 };
964
965 Ok(VirtioDeviceStub {
966 dev: Box::new(dev),
967 jail,
968 })
969}
970
Chia-I Wu16fb6592021-11-10 11:45:32 -0800971#[cfg(feature = "gpu")]
Chia-I Wu7f0f7c12022-01-12 10:42:18 -0800972fn get_gpu_render_server_environment(cache_info: &GpuCacheInfo) -> Result<Vec<String>> {
973 let mut env = Vec::new();
974
975 let mut cache_env_keys = HashSet::with_capacity(cache_info.environment.len());
976 for (key, val) in cache_info.environment.iter() {
977 env.push(format!("{}={}", key, val));
978 cache_env_keys.insert(*key);
979 }
980
981 for (key_os, val_os) in env::vars_os() {
982 // minijail should accept OsStr rather than str...
983 let into_string_err = |_| anyhow!("invalid environment key/val");
984 let key = key_os.into_string().map_err(into_string_err)?;
985 let val = val_os.into_string().map_err(into_string_err)?;
986
987 if !cache_env_keys.contains(key.as_str()) {
988 env.push(format!("{}={}", key, val));
989 }
990 }
991
992 Ok(env)
993}
994
995#[cfg(feature = "gpu")]
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -0800996struct ScopedMinijail(Minijail);
997
998#[cfg(feature = "gpu")]
999impl Drop for ScopedMinijail {
1000 fn drop(&mut self) {
1001 let _ = self.0.kill();
1002 }
1003}
1004
1005#[cfg(feature = "gpu")]
Chia-I Wu16fb6592021-11-10 11:45:32 -08001006fn start_gpu_render_server(
1007 cfg: &Config,
1008 render_server_parameters: &GpuRenderServerParameters,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08001009) -> Result<(Minijail, SafeDescriptor)> {
Chia-I Wu16fb6592021-11-10 11:45:32 -08001010 let (server_socket, client_socket) =
1011 UnixSeqpacket::pair().context("failed to create render server socket")?;
1012
Chia-I Wu7f0f7c12022-01-12 10:42:18 -08001013 let mut env = None;
Chia-I Wu16fb6592021-11-10 11:45:32 -08001014 let jail = match gpu_jail(cfg, "gpu_render_server")? {
1015 Some(mut jail) => {
Chia-I Wu7f0f7c12022-01-12 10:42:18 -08001016 let cache_info = get_gpu_cache_info(
1017 render_server_parameters.cache_path.as_ref(),
1018 render_server_parameters.cache_size.as_ref(),
1019 cfg.sandbox,
1020 );
1021
1022 if let Some(dir) = cache_info.directory {
1023 jail.mount_bind(dir, dir, true)?;
1024 }
1025
1026 if !cache_info.environment.is_empty() {
1027 env = Some(get_gpu_render_server_environment(&cache_info)?);
1028 }
Chia-I Wu16fb6592021-11-10 11:45:32 -08001029
Chia-I Wub86f7f62021-12-13 12:10:22 -08001030 // bind mount /dev/log for syslog
1031 let log_path = Path::new("/dev/log");
1032 if log_path.exists() {
1033 jail.mount_bind(log_path, log_path, true)?;
1034 }
1035
Chia-I Wu16fb6592021-11-10 11:45:32 -08001036 // Run as root in the jail to keep capabilities after execve, which is needed for
1037 // mounting to work. All capabilities will be dropped afterwards.
1038 add_current_user_as_root_to_jail(&mut jail)?;
1039
1040 jail
1041 }
1042 None => Minijail::new().context("failed to create jail")?,
1043 };
1044
1045 let inheritable_fds = [
1046 server_socket.as_raw_descriptor(),
1047 libc::STDOUT_FILENO,
1048 libc::STDERR_FILENO,
1049 ];
1050
1051 let cmd = &render_server_parameters.path;
1052 let cmd_str = cmd
1053 .to_str()
1054 .ok_or_else(|| anyhow!("invalid render server path"))?;
1055 let fd_str = server_socket.as_raw_descriptor().to_string();
1056 let args = [cmd_str, "--socket-fd", &fd_str];
1057
Chia-I Wu7f0f7c12022-01-12 10:42:18 -08001058 let mut envp: Option<Vec<&str>> = None;
1059 if let Some(ref env) = env {
1060 envp = Some(env.iter().map(AsRef::as_ref).collect());
1061 }
1062
1063 jail.run_command(minijail::Command::new_for_path(
1064 cmd,
1065 &inheritable_fds,
1066 &args,
1067 envp.as_deref(),
1068 )?)
1069 .context("failed to start gpu render server")?;
Chia-I Wu16fb6592021-11-10 11:45:32 -08001070
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08001071 Ok((jail, SafeDescriptor::from(client_socket)))
Chia-I Wu16fb6592021-11-10 11:45:32 -08001072}
1073
David Tolnay2b089fc2019-03-04 15:33:22 -08001074fn create_wayland_device(
1075 cfg: &Config,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001076 control_tube: Tube,
1077 resource_bridge: Option<Tube>,
David Tolnay2b089fc2019-03-04 15:33:22 -08001078) -> DeviceResult {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001079 let wayland_socket_dirs = cfg
1080 .wayland_socket_paths
1081 .iter()
1082 .map(|(_name, path)| path.parent())
1083 .collect::<Option<Vec<_>>>()
Daniel Verkamp6b298582021-08-16 15:37:11 -07001084 .ok_or_else(|| anyhow!("wayland socket path has no parent or file name"))?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001085
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001086 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +01001087 let dev = virtio::Wl::new(
1088 features,
1089 cfg.wayland_socket_paths.clone(),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001090 control_tube,
Will Deacon81d5adb2020-10-06 18:37:48 +01001091 resource_bridge,
1092 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001093 .context("failed to create wayland device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001094
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001095 let jail = match simple_jail(cfg, "wl_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -08001096 Some(mut jail) => {
1097 // Create a tmpfs in the device's root directory so that we can bind mount the wayland
1098 // socket directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
1099 jail.mount_with_data(
1100 Path::new("none"),
1101 Path::new("/"),
1102 "tmpfs",
1103 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
1104 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -08001105 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001106
1107 // Bind mount the wayland socket's directory into jail's root. This is necessary since
1108 // each new wayland context must open() the socket. If the wayland socket is ever
1109 // destroyed and remade in the same host directory, new connections will be possible
1110 // without restarting the wayland device.
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001111 for dir in &wayland_socket_dirs {
1112 jail.mount_bind(dir, dir, true)?;
1113 }
Fergus Dall51200512021-08-19 12:54:26 +10001114 add_current_user_to_jail(&mut jail)?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001115
1116 Some(jail)
1117 }
1118 None => None,
1119 };
1120
1121 Ok(VirtioDeviceStub {
1122 dev: Box::new(dev),
1123 jail,
1124 })
1125}
1126
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001127#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
1128fn create_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001129 backend: VideoBackendType,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001130 cfg: &Config,
1131 typ: devices::virtio::VideoDeviceType,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001132 resource_bridge: Tube,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001133) -> DeviceResult {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001134 let jail = match simple_jail(cfg, "video_device")? {
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001135 Some(mut jail) => {
1136 match typ {
Alexandre Courbot8230abf2021-06-26 22:49:26 +09001137 #[cfg(feature = "video-decoder")]
Fergus Dall51200512021-08-19 12:54:26 +10001138 devices::virtio::VideoDeviceType::Decoder => add_current_user_to_jail(&mut jail)?,
Alexandre Courbot8230abf2021-06-26 22:49:26 +09001139 #[cfg(feature = "video-encoder")]
Fergus Dall51200512021-08-19 12:54:26 +10001140 devices::virtio::VideoDeviceType::Encoder => add_current_user_to_jail(&mut jail)?,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001141 };
1142
1143 // Create a tmpfs in the device's root directory so that we can bind mount files.
1144 jail.mount_with_data(
1145 Path::new("none"),
1146 Path::new("/"),
1147 "tmpfs",
1148 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
1149 "size=67108864",
1150 )?;
1151
Alexandre Courbotc02960d2021-07-11 23:06:30 +09001152 #[cfg(feature = "libvda")]
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001153 // Render node for libvda.
Alexandre Courbot54cf8342021-12-20 18:10:08 +09001154 if backend == VideoBackendType::Libvda || backend == VideoBackendType::LibvdaVd {
Chih-Yu Huangd2c2bd12021-12-06 14:09:59 +09001155 // follow the implementation at:
1156 // https://source.corp.google.com/chromeos_public/src/platform/minigbm/cros_gralloc/cros_gralloc_driver.cc;l=90;bpv=0;cl=c06cc9cccb3cf3c7f9d2aec706c27c34cd6162a0
1157 const DRM_NUM_NODES: u32 = 63;
1158 const DRM_RENDER_NODE_START: u32 = 128;
1159 for offset in 0..DRM_NUM_NODES {
1160 let path_str = format!("/dev/dri/renderD{}", DRM_RENDER_NODE_START + offset);
1161 let dev_dri_path = Path::new(&path_str);
1162 if !dev_dri_path.exists() {
1163 break;
1164 }
1165 jail.mount_bind(dev_dri_path, dev_dri_path, false)?;
1166 }
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001167 }
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001168
David Stevense341d0a2020-10-08 18:02:32 +09001169 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1170 {
1171 // Device nodes used by libdrm through minigbm in libvda on AMD devices.
1172 let sys_dev_char_path = Path::new("/sys/dev/char");
1173 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
1174 let sys_devices_path = Path::new("/sys/devices");
1175 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
1176
1177 // Required for loading dri libraries loaded by minigbm on AMD devices.
Alexandre Courbot22740d82021-12-15 17:06:27 +09001178 jail_mount_bind_if_exists(&mut jail, &["/usr/lib64"])?;
David Stevense341d0a2020-10-08 18:02:32 +09001179 }
1180
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001181 // Device nodes required by libchrome which establishes Mojo connection in libvda.
1182 let dev_urandom_path = Path::new("/dev/urandom");
1183 jail.mount_bind(dev_urandom_path, dev_urandom_path, false)?;
1184 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
1185 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
1186
1187 Some(jail)
1188 }
1189 None => None,
1190 };
1191
1192 Ok(VirtioDeviceStub {
1193 dev: Box::new(devices::virtio::VideoDevice::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001194 virtio::base_features(cfg.protected_vm),
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001195 typ,
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001196 backend,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001197 Some(resource_bridge),
1198 )),
1199 jail,
1200 })
1201}
1202
1203#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
1204fn register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001205 backend: VideoBackendType,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001206 devs: &mut Vec<VirtioDeviceStub>,
Daniel Verkampffb59122021-03-18 14:06:15 -07001207 video_tube: Tube,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001208 cfg: &Config,
1209 typ: devices::virtio::VideoDeviceType,
Daniel Verkamp6b298582021-08-16 15:37:11 -07001210) -> Result<()> {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001211 devs.push(create_video_device(backend, cfg, typ, video_tube)?);
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001212 Ok(())
1213}
1214
Chirantan Ekbote3e8d52b2021-09-10 18:27:16 +09001215fn create_vhost_vsock_device(cfg: &Config, cid: u64) -> DeviceResult {
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001216 let features = virtio::base_features(cfg.protected_vm);
Christian Blichmann50f95912021-11-05 16:59:39 +01001217
1218 let device_file = match cfg
1219 .vhost_vsock_device
1220 .as_ref()
1221 .unwrap_or(&VhostVsockDeviceParameter::default())
1222 {
1223 VhostVsockDeviceParameter::Fd(fd) => {
1224 let fd = validate_raw_descriptor(*fd)
1225 .context("failed to validate fd for virtual socker device")?;
1226 // Safe because the `fd` is actually owned by this process and
1227 // we have a unique handle to it.
1228 unsafe { File::from_raw_fd(fd) }
1229 }
1230 VhostVsockDeviceParameter::Path(path) => OpenOptions::new()
1231 .read(true)
1232 .write(true)
1233 .custom_flags(libc::O_CLOEXEC | libc::O_NONBLOCK)
1234 .open(path)
1235 .context("failed to open virtual socket device")?,
1236 };
1237
1238 let dev = virtio::vhost::Vsock::new(device_file, features, cid)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001239 .context("failed to set up virtual socket device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001240
1241 Ok(VirtioDeviceStub {
1242 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001243 jail: simple_jail(cfg, "vhost_vsock_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -08001244 })
1245}
1246
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001247fn create_fs_device(
1248 cfg: &Config,
1249 uid_map: &str,
1250 gid_map: &str,
1251 src: &Path,
1252 tag: &str,
1253 fs_cfg: virtio::fs::passthrough::Config,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001254 device_tube: Tube,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001255) -> DeviceResult {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001256 let max_open_files =
1257 base::get_max_open_files().context("failed to get max number of open files")?;
Matt Delcoc24ad782020-02-14 13:24:36 -08001258 let j = if cfg.sandbox {
1259 let seccomp_policy = cfg.seccomp_policy_dir.join("fs_device");
1260 let config = SandboxConfig {
1261 limit_caps: false,
1262 uid_map: Some(uid_map),
1263 gid_map: Some(gid_map),
1264 log_failures: cfg.seccomp_log_failures,
1265 seccomp_policy: &seccomp_policy,
Dmitry Torokhov2e6e61d2022-01-24 13:39:09 -08001266 // We want bind mounts from the parent namespaces to propagate into the fs device's
1267 // namespace.
1268 remount_mode: Some(libc::MS_SLAVE),
Matt Delcoc24ad782020-02-14 13:24:36 -08001269 };
Dmitry Torokhov2e6e61d2022-01-24 13:39:09 -08001270 create_base_minijail(src, Some(max_open_files), Some(&config))?
Matt Delcoc24ad782020-02-14 13:24:36 -08001271 } else {
1272 create_base_minijail(src, Some(max_open_files), None)?
1273 };
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001274
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001275 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001276 // TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic
1277 // when num_queues > 1.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001278 let dev = virtio::fs::Fs::new(features, tag, 1, fs_cfg, device_tube)
1279 .context("failed to create fs device")?;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001280
1281 Ok(VirtioDeviceStub {
1282 dev: Box::new(dev),
1283 jail: Some(j),
1284 })
1285}
1286
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001287fn create_9p_device(
1288 cfg: &Config,
1289 uid_map: &str,
1290 gid_map: &str,
1291 src: &Path,
1292 tag: &str,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001293 mut p9_cfg: p9::Config,
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001294) -> DeviceResult {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001295 let max_open_files =
1296 base::get_max_open_files().context("failed to get max number of open files")?;
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001297 let (jail, root) = if cfg.sandbox {
1298 let seccomp_policy = cfg.seccomp_policy_dir.join("9p_device");
1299 let config = SandboxConfig {
1300 limit_caps: false,
1301 uid_map: Some(uid_map),
1302 gid_map: Some(gid_map),
1303 log_failures: cfg.seccomp_log_failures,
1304 seccomp_policy: &seccomp_policy,
Dmitry Torokhov2e6e61d2022-01-24 13:39:09 -08001305 // We want bind mounts from the parent namespaces to propagate into the 9p server's
1306 // namespace.
1307 remount_mode: Some(libc::MS_SLAVE),
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001308 };
David Tolnay2b089fc2019-03-04 15:33:22 -08001309
Dmitry Torokhov2e6e61d2022-01-24 13:39:09 -08001310 let jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
Chirantan Ekbote055de382020-01-24 12:16:58 +09001311
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001312 // The shared directory becomes the root of the device's file system.
1313 let root = Path::new("/");
1314 (Some(jail), root)
1315 } else {
1316 // There's no mount namespace so we tell the server to treat the source directory as the
1317 // root.
1318 (None, src)
David Tolnay2b089fc2019-03-04 15:33:22 -08001319 };
1320
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001321 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001322 p9_cfg.root = root.into();
Daniel Verkamp6b298582021-08-16 15:37:11 -07001323 let dev = virtio::P9::new(features, tag, p9_cfg).context("failed to create 9p device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001324
1325 Ok(VirtioDeviceStub {
1326 dev: Box::new(dev),
1327 jail,
1328 })
1329}
1330
Jakub Starona3411ea2019-04-24 10:55:25 -07001331fn create_pmem_device(
1332 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001333 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001334 resources: &mut SystemAllocator,
1335 disk: &DiskOption,
1336 index: usize,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001337 pmem_device_tube: Tube,
Jakub Starona3411ea2019-04-24 10:55:25 -07001338) -> DeviceResult {
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09001339 let fd = open_file(&disk.path, disk.read_only, false /*O_DIRECT*/)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001340 .with_context(|| format!("failed to load disk image {}", disk.path.display()))?;
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001341
1342 let (disk_size, arena_size) = {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001343 let metadata = std::fs::metadata(&disk.path).with_context(|| {
1344 format!("failed to get disk image {} metadata", disk.path.display())
1345 })?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001346 let disk_len = metadata.len();
1347 // Linux requires pmem region sizes to be 2 MiB aligned. Linux will fill any partial page
1348 // at the end of an mmap'd file and won't write back beyond the actual file length, but if
1349 // we just align the size of the file to 2 MiB then access beyond the last page of the
1350 // mapped file will generate SIGBUS. So use a memory mapping arena that will provide
1351 // padding up to 2 MiB.
1352 let alignment = 2 * 1024 * 1024;
1353 let align_adjust = if disk_len % alignment != 0 {
1354 alignment - (disk_len % alignment)
1355 } else {
1356 0
1357 };
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001358 (
1359 disk_len,
1360 disk_len
1361 .checked_add(align_adjust)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001362 .ok_or_else(|| anyhow!("pmem device image too big"))?,
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001363 )
Jakub Starona3411ea2019-04-24 10:55:25 -07001364 };
1365
1366 let protection = {
1367 if disk.read_only {
1368 Protection::read()
1369 } else {
1370 Protection::read_write()
1371 }
1372 };
1373
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001374 let arena = {
Jakub Starona3411ea2019-04-24 10:55:25 -07001375 // Conversion from u64 to usize may fail on 32bit system.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001376 let arena_size = usize::try_from(arena_size).context("pmem device image too big")?;
1377 let disk_size = usize::try_from(disk_size).context("pmem device image too big")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001378
Daniel Verkamp6b298582021-08-16 15:37:11 -07001379 let mut arena =
1380 MemoryMappingArena::new(arena_size).context("failed to reserve pmem memory")?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001381 arena
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001382 .add_fd_offset_protection(0, disk_size, &fd, 0, protection)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001383 .context("failed to reserve pmem memory")?;
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001384
1385 // If the disk is not a multiple of the page size, the OS will fill the remaining part
1386 // of the page with zeroes. However, the anonymous mapping added below must start on a
1387 // page boundary, so round up the size before calculating the offset of the anon region.
1388 let disk_size = round_up_to_page_size(disk_size);
1389
1390 if arena_size > disk_size {
1391 // Add an anonymous region with the same protection as the disk mapping if the arena
1392 // size was aligned.
1393 arena
1394 .add_anon_protection(disk_size, arena_size - disk_size, protection)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001395 .context("failed to reserve pmem padding")?;
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001396 }
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001397 arena
Jakub Starona3411ea2019-04-24 10:55:25 -07001398 };
1399
1400 let mapping_address = resources
Xiong Zhang383b3b52019-10-30 14:59:26 +08001401 .mmio_allocator(MmioType::High)
Daniel Verkamp57e4f542021-10-28 09:56:40 -07001402 .reverse_allocate_with_align(
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001403 arena_size,
Jakub Starona3411ea2019-04-24 10:55:25 -07001404 Alloc::PmemDevice(index),
1405 format!("pmem_disk_image_{}", index),
1406 // Linux kernel requires pmem namespaces to be 128 MiB aligned.
1407 128 * 1024 * 1024, /* 128 MiB */
1408 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001409 .context("failed to allocate memory for pmem device")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001410
Daniel Verkampe1980a92020-02-07 11:00:55 -08001411 let slot = vm
Gurchetan Singh173fe622020-05-21 18:05:06 -07001412 .add_memory_region(
Daniel Verkampe1980a92020-02-07 11:00:55 -08001413 GuestAddress(mapping_address),
Gurchetan Singh173fe622020-05-21 18:05:06 -07001414 Box::new(arena),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001415 /* read_only = */ disk.read_only,
1416 /* log_dirty_pages = */ false,
1417 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001418 .context("failed to add pmem device memory")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001419
Daniel Verkampe1980a92020-02-07 11:00:55 -08001420 let dev = virtio::Pmem::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001421 virtio::base_features(cfg.protected_vm),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001422 fd,
1423 GuestAddress(mapping_address),
1424 slot,
1425 arena_size,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001426 Some(pmem_device_tube),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001427 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001428 .context("failed to create pmem device")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001429
1430 Ok(VirtioDeviceStub {
1431 dev: Box::new(dev) as Box<dyn VirtioDevice>,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001432 jail: simple_jail(cfg, "pmem_device")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07001433 })
1434}
1435
Zide Chendfc4b882021-03-10 16:35:37 -08001436fn create_iommu_device(
1437 cfg: &Config,
Zide Chen71435c12021-03-03 15:02:02 -08001438 phys_max_addr: u64,
Zide Chendfc4b882021-03-10 16:35:37 -08001439 endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
1440) -> DeviceResult {
Zide Chen71435c12021-03-03 15:02:02 -08001441 let dev = virtio::Iommu::new(
1442 virtio::base_features(cfg.protected_vm),
1443 endpoints,
1444 phys_max_addr,
1445 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001446 .context("failed to create IOMMU device")?;
Zide Chendfc4b882021-03-10 16:35:37 -08001447
1448 Ok(VirtioDeviceStub {
1449 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001450 jail: simple_jail(cfg, "iommu_device")?,
Zide Chendfc4b882021-03-10 16:35:37 -08001451 })
1452}
1453
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001454fn create_console_device(cfg: &Config, param: &SerialParameters) -> DeviceResult {
Michael Hoylecd23bc22020-10-20 22:12:20 -07001455 let mut keep_rds = Vec::new();
Daniel Verkamp6b298582021-08-16 15:37:11 -07001456 let evt = Event::new().context("failed to create event")?;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001457 let dev = param
Michael Hoylecd23bc22020-10-20 22:12:20 -07001458 .create_serial_device::<Console>(cfg.protected_vm, &evt, &mut keep_rds)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001459 .context("failed to create console device")?;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001460
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001461 let jail = match simple_jail(cfg, "serial")? {
Nicholas Verne71e73d82020-07-08 17:19:55 +10001462 Some(mut jail) => {
1463 // Create a tmpfs in the device's root directory so that we can bind mount the
1464 // log socket directory into it.
1465 // The size=67108864 is size=64*1024*1024 or size=64MB.
1466 jail.mount_with_data(
1467 Path::new("none"),
1468 Path::new("/"),
1469 "tmpfs",
1470 (libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_NOSUID) as usize,
1471 "size=67108864",
1472 )?;
Fergus Dall51200512021-08-19 12:54:26 +10001473 add_current_user_to_jail(&mut jail)?;
Nicholas Verne71e73d82020-07-08 17:19:55 +10001474 let res = param.add_bind_mounts(&mut jail);
1475 if res.is_err() {
1476 error!("failed to add bind mounts for console device");
1477 }
1478 Some(jail)
1479 }
1480 None => None,
1481 };
1482
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001483 Ok(VirtioDeviceStub {
1484 dev: Box::new(dev),
Nicholas Verne71e73d82020-07-08 17:19:55 +10001485 jail, // TODO(dverkamp): use a separate policy for console?
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001486 })
1487}
1488
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001489#[cfg(feature = "audio")]
1490fn create_sound_device(path: &Path, cfg: &Config) -> DeviceResult {
1491 let dev = virtio::new_sound(path, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001492 .context("failed to create sound device")?;
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001493
1494 Ok(VirtioDeviceStub {
1495 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001496 jail: simple_jail(cfg, "vios_audio_device")?,
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001497 })
1498}
1499
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001500// gpu_device_tube is not used when GPU support is disabled.
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -07001501#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001502fn create_virtio_devices(
1503 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001504 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001505 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001506 _exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001507 wayland_device_tube: Tube,
1508 gpu_device_tube: Tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001509 vhost_user_gpu_tubes: Vec<(Tube, Tube)>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001510 balloon_device_tube: Tube,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +08001511 balloon_inflate_tube: Option<Tube>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001512 disk_device_tubes: &mut Vec<Tube>,
1513 pmem_device_tubes: &mut Vec<Tube>,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001514 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001515 fs_device_tubes: &mut Vec<Tube>,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08001516 #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>,
David Tolnay2b089fc2019-03-04 15:33:22 -08001517) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -07001518 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -08001519
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001520 for (_, param) in cfg
1521 .serial_parameters
1522 .iter()
1523 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
1524 {
1525 let dev = create_console_device(cfg, param)?;
1526 devs.push(dev);
1527 }
1528
Zach Reizner8fb52112017-12-13 16:04:39 -08001529 for disk in &cfg.disks {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001530 let disk_device_tube = disk_device_tubes.remove(0);
1531 devs.push(create_block_device(cfg, disk, disk_device_tube)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001532 }
1533
Keiichi Watanabef3a37f42021-01-21 15:41:11 +09001534 for blk in &cfg.vhost_user_blk {
1535 devs.push(create_vhost_user_block_device(cfg, blk)?);
1536 }
1537
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +09001538 for console in &cfg.vhost_user_console {
1539 devs.push(create_vhost_user_console_device(cfg, console)?);
1540 }
1541
Jakub Starona3411ea2019-04-24 10:55:25 -07001542 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001543 let pmem_device_tube = pmem_device_tubes.remove(0);
Daniel Verkampe1980a92020-02-07 11:00:55 -08001544 devs.push(create_pmem_device(
1545 cfg,
1546 vm,
1547 resources,
1548 pmem_disk,
1549 index,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001550 pmem_device_tube,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001551 )?);
Jakub Starona3411ea2019-04-24 10:55:25 -07001552 }
1553
David Tolnay2b089fc2019-03-04 15:33:22 -08001554 devs.push(create_rng_device(cfg)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001555
David Tolnayde6b29a2018-12-20 11:49:46 -08001556 #[cfg(feature = "tpm")]
1557 {
David Tolnay43f8e212019-02-13 17:28:16 -08001558 if cfg.software_tpm {
David Tolnay2b089fc2019-03-04 15:33:22 -08001559 devs.push(create_tpm_device(cfg)?);
David Tolnay43f8e212019-02-13 17:28:16 -08001560 }
David Tolnayde6b29a2018-12-20 11:49:46 -08001561 }
1562
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001563 for (idx, single_touch_spec) in cfg.virtio_single_touch.iter().enumerate() {
1564 devs.push(create_single_touch_device(
1565 cfg,
1566 single_touch_spec,
1567 idx as u32,
1568 )?);
Jorge E. Moreira99d3f082019-03-07 10:59:54 -08001569 }
1570
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001571 for (idx, multi_touch_spec) in cfg.virtio_multi_touch.iter().enumerate() {
1572 devs.push(create_multi_touch_device(
1573 cfg,
1574 multi_touch_spec,
1575 idx as u32,
1576 )?);
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001577 }
1578
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001579 for (idx, trackpad_spec) in cfg.virtio_trackpad.iter().enumerate() {
1580 devs.push(create_trackpad_device(cfg, trackpad_spec, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001581 }
1582
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001583 for (idx, mouse_socket) in cfg.virtio_mice.iter().enumerate() {
1584 devs.push(create_mouse_device(cfg, mouse_socket, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001585 }
1586
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001587 for (idx, keyboard_socket) in cfg.virtio_keyboard.iter().enumerate() {
1588 devs.push(create_keyboard_device(cfg, keyboard_socket, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001589 }
1590
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001591 for (idx, switches_socket) in cfg.virtio_switches.iter().enumerate() {
1592 devs.push(create_switches_device(cfg, switches_socket, idx as u32)?);
Daniel Norman5e23df72021-03-11 10:11:02 -08001593 }
1594
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001595 for dev_path in &cfg.virtio_input_evdevs {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001596 devs.push(create_vinput_device(cfg, dev_path)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001597 }
1598
Chuanxiao Dong146a13b2021-12-09 12:59:54 +08001599 devs.push(create_balloon_device(
1600 cfg,
1601 balloon_device_tube,
1602 balloon_inflate_tube,
1603 )?);
Dylan Reid295ccac2017-11-06 14:06:24 -08001604
Zach Reizner39aa26b2017-12-12 18:03:23 -08001605 // We checked above that if the IP is defined, then the netmask is, too.
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001606 for tap_fd in &cfg.tap_fd {
Alexandre Courbot911773a2021-12-10 14:31:10 +09001607 devs.push(create_tap_net_device_from_fd(cfg, *tap_fd)?);
Jorge E. Moreirab7952802019-02-12 16:43:05 -08001608 }
1609
David Tolnay2b089fc2019-03-04 15:33:22 -08001610 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
1611 (cfg.host_ip, cfg.netmask, cfg.mac_address)
1612 {
Keiichi Watanabe60686582021-03-12 04:53:51 +09001613 if !cfg.vhost_user_net.is_empty() {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001614 bail!("vhost-user-net cannot be used with any of --host_ip, --netmask or --mac");
Keiichi Watanabe60686582021-03-12 04:53:51 +09001615 }
Alexandre Courbot911773a2021-12-10 14:31:10 +09001616 devs.push(create_net_device_from_config(
1617 cfg,
1618 host_ip,
1619 netmask,
1620 mac_address,
1621 )?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001622 }
1623
Alexandre Courbot993aa7f2021-12-09 14:51:29 +09001624 for tap_name in &cfg.tap_name {
1625 devs.push(create_tap_net_device_from_name(cfg, tap_name.as_bytes())?);
1626 }
1627
Keiichi Watanabe60686582021-03-12 04:53:51 +09001628 for net in &cfg.vhost_user_net {
1629 devs.push(create_vhost_user_net_device(cfg, net)?);
1630 }
1631
Chirantan Ekbote84091e52021-09-10 18:43:17 +09001632 for vsock in &cfg.vhost_user_vsock {
1633 devs.push(create_vhost_user_vsock_device(cfg, vsock)?);
1634 }
1635
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09001636 for opt in &cfg.vhost_user_wl {
1637 devs.push(create_vhost_user_wl_device(cfg, opt)?);
1638 }
1639
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001640 #[cfg(feature = "gpu")]
1641 for (opt, (host_tube, device_tube)) in cfg.vhost_user_gpu.iter().zip(vhost_user_gpu_tubes) {
1642 devs.push(create_vhost_user_gpu_device(
1643 cfg,
1644 opt,
1645 host_tube,
1646 device_tube,
1647 )?);
1648 }
1649
Abhishek Bhardwaj103c1b72021-11-01 15:52:23 -07001650 for opt in &cfg.vvu_proxy {
1651 devs.push(create_vvu_proxy_device(cfg, opt)?);
1652 }
1653
David Tolnayfa701712019-02-13 16:42:54 -08001654 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001655 let mut resource_bridges = Vec::<Tube>::new();
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001656
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001657 if !cfg.wayland_socket_paths.is_empty() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001658 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001659 let mut wl_resource_bridge = None::<Tube>;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001660
1661 #[cfg(feature = "gpu")]
1662 {
Jason Macnakcc7070b2019-11-06 14:48:12 -08001663 if cfg.gpu_parameters.is_some() {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001664 let (wl_socket, gpu_socket) = Tube::pair().context("failed to create tube")?;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001665 resource_bridges.push(gpu_socket);
1666 wl_resource_bridge = Some(wl_socket);
1667 }
1668 }
1669
1670 devs.push(create_wayland_device(
1671 cfg,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001672 wayland_device_tube,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001673 wl_resource_bridge,
1674 )?);
1675 }
David Tolnayfa701712019-02-13 16:42:54 -08001676
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001677 #[cfg(feature = "video-decoder")]
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001678 let video_dec_cfg = if let Some(backend) = cfg.video_dec {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001679 let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
Daniel Verkampffb59122021-03-18 14:06:15 -07001680 resource_bridges.push(gpu_tube);
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001681 Some((video_tube, backend))
Daniel Verkampffb59122021-03-18 14:06:15 -07001682 } else {
1683 None
1684 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001685
1686 #[cfg(feature = "video-encoder")]
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001687 let video_enc_cfg = if let Some(backend) = cfg.video_enc {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001688 let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
Daniel Verkampffb59122021-03-18 14:06:15 -07001689 resource_bridges.push(gpu_tube);
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001690 Some((video_tube, backend))
Daniel Verkampffb59122021-03-18 14:06:15 -07001691 } else {
1692 None
1693 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001694
Zach Reizner3a8100a2017-09-13 19:15:43 -07001695 #[cfg(feature = "gpu")]
1696 {
Noah Golddc7f52b2020-02-01 13:01:58 -08001697 if let Some(gpu_parameters) = &cfg.gpu_parameters {
Jason Macnakd659a0d2021-03-15 15:33:01 -07001698 let mut gpu_display_w = DEFAULT_DISPLAY_WIDTH;
1699 let mut gpu_display_h = DEFAULT_DISPLAY_HEIGHT;
1700 if !gpu_parameters.displays.is_empty() {
1701 gpu_display_w = gpu_parameters.displays[0].width;
1702 gpu_display_h = gpu_parameters.displays[0].height;
1703 }
1704
Zach Reizner65b98f12019-11-22 17:34:58 -08001705 let mut event_devices = Vec::new();
1706 if cfg.display_window_mouse {
1707 let (event_device_socket, virtio_dev_socket) =
Daniel Verkamp6b298582021-08-16 15:37:11 -07001708 UnixStream::pair().context("failed to create socket")?;
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001709 let (multi_touch_width, multi_touch_height) = cfg
1710 .virtio_multi_touch
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001711 .first()
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001712 .as_ref()
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001713 .map(|multi_touch_spec| multi_touch_spec.get_size())
Jason Macnakd659a0d2021-03-15 15:33:01 -07001714 .unwrap_or((gpu_display_w, gpu_display_h));
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001715 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001716 // u32::MAX is the least likely to collide with the indices generated above for
1717 // the multi_touch options, which begin at 0.
1718 u32::MAX,
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001719 virtio_dev_socket,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001720 multi_touch_width,
1721 multi_touch_height,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001722 virtio::base_features(cfg.protected_vm),
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001723 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001724 .context("failed to set up mouse device")?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001725 devs.push(VirtioDeviceStub {
1726 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001727 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001728 });
1729 event_devices.push(EventDevice::touchscreen(event_device_socket));
1730 }
1731 if cfg.display_window_keyboard {
1732 let (event_device_socket, virtio_dev_socket) =
Daniel Verkamp6b298582021-08-16 15:37:11 -07001733 UnixStream::pair().context("failed to create socket")?;
Noah Goldd4ca29b2020-10-27 12:21:52 -07001734 let dev = virtio::new_keyboard(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001735 // u32::MAX is the least likely to collide with the indices generated above for
1736 // the multi_touch options, which begin at 0.
1737 u32::MAX,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001738 virtio_dev_socket,
1739 virtio::base_features(cfg.protected_vm),
1740 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001741 .context("failed to set up keyboard device")?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001742 devs.push(VirtioDeviceStub {
1743 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001744 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001745 });
1746 event_devices.push(EventDevice::keyboard(event_device_socket));
1747 }
Chia-I Wu16fb6592021-11-10 11:45:32 -08001748
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001749 devs.push(create_gpu_device(
1750 cfg,
1751 _exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001752 gpu_device_tube,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001753 resource_bridges,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001754 // Use the unnamed socket for GPU display screens.
1755 cfg.wayland_socket_paths.get(""),
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001756 cfg.x_display.clone(),
Chia-I Wu16fb6592021-11-10 11:45:32 -08001757 render_server_fd,
Zach Reizner65b98f12019-11-22 17:34:58 -08001758 event_devices,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001759 map_request,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001760 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -07001761 }
1762 }
1763
Chih-Yang Hsiae31731c2022-01-05 17:30:28 +08001764 #[cfg(feature = "audio_cras")]
1765 {
1766 for cras_snd in &cfg.cras_snds {
1767 devs.push(create_cras_snd_device(cfg, cras_snd.clone())?);
1768 }
1769 }
1770
Daniel Verkampffb59122021-03-18 14:06:15 -07001771 #[cfg(feature = "video-decoder")]
1772 {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001773 if let Some((video_dec_tube, video_dec_backend)) = video_dec_cfg {
Daniel Verkampffb59122021-03-18 14:06:15 -07001774 register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001775 video_dec_backend,
Daniel Verkampffb59122021-03-18 14:06:15 -07001776 &mut devs,
1777 video_dec_tube,
1778 cfg,
1779 devices::virtio::VideoDeviceType::Decoder,
1780 )?;
1781 }
1782 }
1783
1784 #[cfg(feature = "video-encoder")]
1785 {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001786 if let Some((video_enc_tube, video_enc_backend)) = video_enc_cfg {
Daniel Verkampffb59122021-03-18 14:06:15 -07001787 register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001788 video_enc_backend,
Daniel Verkampffb59122021-03-18 14:06:15 -07001789 &mut devs,
1790 video_enc_tube,
1791 cfg,
1792 devices::virtio::VideoDeviceType::Encoder,
1793 )?;
1794 }
1795 }
1796
Zach Reizneraa575662018-08-15 10:46:32 -07001797 if let Some(cid) = cfg.cid {
Chirantan Ekbote3e8d52b2021-09-10 18:27:16 +09001798 devs.push(create_vhost_vsock_device(cfg, cid)?);
Zach Reizneraa575662018-08-15 10:46:32 -07001799 }
1800
Woody Chow5890b702021-02-12 14:57:02 +09001801 for vhost_user_fs in &cfg.vhost_user_fs {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001802 devs.push(create_vhost_user_fs_device(cfg, vhost_user_fs)?);
Woody Chow5890b702021-02-12 14:57:02 +09001803 }
1804
Woody Chow1b16db12021-04-02 16:59:59 +09001805 #[cfg(feature = "audio")]
1806 for vhost_user_snd in &cfg.vhost_user_snd {
1807 devs.push(create_vhost_user_snd_device(cfg, vhost_user_snd)?);
1808 }
1809
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001810 for shared_dir in &cfg.shared_dirs {
1811 let SharedDir {
1812 src,
1813 tag,
1814 kind,
1815 uid_map,
1816 gid_map,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001817 fs_cfg,
1818 p9_cfg,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001819 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -08001820
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001821 let dev = match kind {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001822 SharedDirKind::FS => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001823 let device_tube = fs_device_tubes.remove(0);
1824 create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone(), device_tube)?
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001825 }
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001826 SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag, p9_cfg.clone())?,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001827 };
1828 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -08001829 }
1830
JaeMan Parkeb9cc532021-07-02 15:02:59 +09001831 if let Some(vhost_user_mac80211_hwsim) = &cfg.vhost_user_mac80211_hwsim {
1832 devs.push(create_vhost_user_mac80211_hwsim_device(
1833 cfg,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001834 vhost_user_mac80211_hwsim,
JaeMan Parkeb9cc532021-07-02 15:02:59 +09001835 )?);
1836 }
1837
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001838 #[cfg(feature = "audio")]
1839 if let Some(path) = &cfg.sound {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001840 devs.push(create_sound_device(path, cfg)?);
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001841 }
1842
David Tolnay2b089fc2019-03-04 15:33:22 -08001843 Ok(devs)
1844}
1845
Xiong Zhang10f15052021-04-08 17:23:33 +08001846fn create_vfio_device(
1847 cfg: &Config,
1848 vm: &impl Vm,
1849 resources: &mut SystemAllocator,
1850 control_tubes: &mut Vec<TaggedControlTube>,
1851 vfio_path: &Path,
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001852 bus_num: Option<u8>,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001853 iommu_endpoints: &mut BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
1854 coiommu_endpoints: Option<&mut Vec<u16>>,
1855 iommu_dev: IommuDevType,
Xiong Zhang10f15052021-04-08 17:23:33 +08001856) -> DeviceResult<(Box<VfioPciDevice>, Option<Minijail>)> {
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001857 let vfio_container = VfioCommonSetup::vfio_get_container(iommu_dev, Some(vfio_path))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001858 .context("failed to get vfio container")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001859
1860 // create MSI, MSI-X, and Mem request sockets for each vfio device
Daniel Verkamp6b298582021-08-16 15:37:11 -07001861 let (vfio_host_tube_msi, vfio_device_tube_msi) =
1862 Tube::pair().context("failed to create tube")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001863 control_tubes.push(TaggedControlTube::VmIrq(vfio_host_tube_msi));
1864
Daniel Verkamp6b298582021-08-16 15:37:11 -07001865 let (vfio_host_tube_msix, vfio_device_tube_msix) =
1866 Tube::pair().context("failed to create tube")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001867 control_tubes.push(TaggedControlTube::VmIrq(vfio_host_tube_msix));
1868
Daniel Verkamp6b298582021-08-16 15:37:11 -07001869 let (vfio_host_tube_mem, vfio_device_tube_mem) =
1870 Tube::pair().context("failed to create tube")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001871 control_tubes.push(TaggedControlTube::VmMemory(vfio_host_tube_mem));
1872
Xiong Zhange2ff2c42021-06-02 16:49:50 +08001873 let hotplug = bus_num.is_some();
Xiong Zhang81ae6f32021-06-26 00:16:00 +08001874 let vfio_device_tube_vm = if hotplug {
1875 let (vfio_host_tube_vm, device_tube_vm) = Tube::pair().context("failed to create tube")?;
1876 control_tubes.push(TaggedControlTube::Vm(vfio_host_tube_vm));
1877 Some(device_tube_vm)
1878 } else {
1879 None
1880 };
Xiong Zhange2ff2c42021-06-02 16:49:50 +08001881
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001882 let vfio_device = VfioDevice::new_passthrough(
1883 &vfio_path,
1884 vm,
1885 vfio_container.clone(),
1886 iommu_dev != IommuDevType::NoIommu,
1887 )
1888 .context("failed to create vfio device")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001889 let mut vfio_pci_device = Box::new(VfioPciDevice::new(
1890 vfio_device,
Xiong Zhange19ab752021-05-20 18:18:46 +08001891 bus_num,
Xiong Zhang10f15052021-04-08 17:23:33 +08001892 vfio_device_tube_msi,
1893 vfio_device_tube_msix,
1894 vfio_device_tube_mem,
Xiong Zhang81ae6f32021-06-26 00:16:00 +08001895 vfio_device_tube_vm,
Xiong Zhang10f15052021-04-08 17:23:33 +08001896 ));
1897 // early reservation for pass-through PCI devices.
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001898 let endpoint_addr = vfio_pci_device
1899 .allocate_address(resources)
1900 .context("failed to allocate resources early for vfio pci dev")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001901
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001902 match iommu_dev {
1903 IommuDevType::NoIommu => {}
1904 IommuDevType::VirtioIommu => {
1905 iommu_endpoints.insert(endpoint_addr.to_u32(), vfio_container);
1906 }
1907 IommuDevType::CoIommu => {
1908 if let Some(endpoints) = coiommu_endpoints {
1909 endpoints.push(endpoint_addr.to_u32() as u16);
1910 } else {
1911 bail!("Missed coiommu_endpoints vector to store the endpoint addr");
1912 }
1913 }
Zide Chendfc4b882021-03-10 16:35:37 -08001914 }
1915
Xiong Zhange2ff2c42021-06-02 16:49:50 +08001916 if hotplug {
1917 Ok((vfio_pci_device, None))
1918 } else {
1919 Ok((vfio_pci_device, simple_jail(cfg, "vfio_device")?))
1920 }
Xiong Zhang10f15052021-04-08 17:23:33 +08001921}
1922
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001923fn create_vfio_platform_device(
1924 cfg: &Config,
1925 vm: &impl Vm,
1926 _resources: &mut SystemAllocator,
1927 control_tubes: &mut Vec<TaggedControlTube>,
1928 vfio_path: &Path,
1929 _endpoints: &mut BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001930 iommu_dev: IommuDevType,
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001931) -> DeviceResult<(VfioPlatformDevice, Option<Minijail>)> {
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001932 let vfio_container = VfioCommonSetup::vfio_get_container(iommu_dev, Some(vfio_path))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001933 .context("Failed to create vfio device")?;
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001934
Daniel Verkamp6b298582021-08-16 15:37:11 -07001935 let (vfio_host_tube_mem, vfio_device_tube_mem) =
1936 Tube::pair().context("failed to create tube")?;
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001937 control_tubes.push(TaggedControlTube::VmMemory(vfio_host_tube_mem));
1938
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001939 let vfio_device = VfioDevice::new_passthrough(
1940 &vfio_path,
1941 vm,
1942 vfio_container,
1943 iommu_dev != IommuDevType::NoIommu,
1944 )
1945 .context("Failed to create vfio device")?;
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001946 let vfio_plat_dev = VfioPlatformDevice::new(vfio_device, vfio_device_tube_mem);
1947
1948 Ok((vfio_plat_dev, simple_jail(cfg, "vfio_platform_device")?))
1949}
1950
David Tolnay2b089fc2019-03-04 15:33:22 -08001951fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -06001952 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001953 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001954 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001955 exit_evt: &Event,
Zide Chen71435c12021-03-03 15:02:02 -08001956 phys_max_addr: u64,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001957 control_tubes: &mut Vec<TaggedControlTube>,
1958 wayland_device_tube: Tube,
1959 gpu_device_tube: Tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001960 vhost_user_gpu_tubes: Vec<(Tube, Tube)>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001961 balloon_device_tube: Tube,
1962 disk_device_tubes: &mut Vec<Tube>,
1963 pmem_device_tubes: &mut Vec<Tube>,
1964 fs_device_tubes: &mut Vec<Tube>,
Daniel Verkampf1439d42021-05-21 13:55:10 -07001965 #[cfg(feature = "usb")] usb_provider: HostBackendDeviceProvider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001966 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08001967 #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>,
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001968) -> DeviceResult<Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>> {
Chuanxiao Dong146a13b2021-12-09 12:59:54 +08001969 let mut devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)> = Vec::new();
1970 let mut balloon_inflate_tube: Option<Tube> = None;
Zide Chen5deee482021-04-19 11:06:01 -07001971 if !cfg.vfio.is_empty() {
Zide Chendfc4b882021-03-10 16:35:37 -08001972 let mut iommu_attached_endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>> =
1973 BTreeMap::new();
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001974 let mut coiommu_attached_endpoints = Vec::new();
Zide Chendfc4b882021-03-10 16:35:37 -08001975
Tomasz Nowicki71aca792021-06-09 18:53:49 +00001976 for vfio_dev in cfg
1977 .vfio
1978 .iter()
1979 .filter(|dev| dev.get_type() == VfioType::Pci)
1980 {
1981 let vfio_path = &vfio_dev.vfio_path;
Zide Chen5deee482021-04-19 11:06:01 -07001982 let (vfio_pci_device, jail) = create_vfio_device(
1983 cfg,
1984 vm,
1985 resources,
1986 control_tubes,
1987 vfio_path.as_path(),
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001988 None,
Zide Chendfc4b882021-03-10 16:35:37 -08001989 &mut iommu_attached_endpoints,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001990 Some(&mut coiommu_attached_endpoints),
1991 vfio_dev.iommu_dev_type(),
Zide Chen5deee482021-04-19 11:06:01 -07001992 )?;
Zide Chendfc4b882021-03-10 16:35:37 -08001993
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001994 devices.push((vfio_pci_device, jail));
Zide Chen5deee482021-04-19 11:06:01 -07001995 }
Zide Chendfc4b882021-03-10 16:35:37 -08001996
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001997 for vfio_dev in cfg
1998 .vfio
1999 .iter()
2000 .filter(|dev| dev.get_type() == VfioType::Platform)
2001 {
2002 let vfio_path = &vfio_dev.vfio_path;
2003 let (vfio_plat_dev, jail) = create_vfio_platform_device(
2004 cfg,
2005 vm,
2006 resources,
2007 control_tubes,
2008 vfio_path.as_path(),
2009 &mut iommu_attached_endpoints,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08002010 IommuDevType::NoIommu, // Virtio IOMMU is not supported yet
Tomasz Nowicki344eb142021-09-22 05:51:58 +00002011 )?;
2012
2013 devices.push((Box::new(vfio_plat_dev), jail));
2014 }
2015
Chuanxiao Dongcb03ec62022-01-20 08:25:38 +08002016 if !coiommu_attached_endpoints.is_empty() || !iommu_attached_endpoints.is_empty() {
2017 let mut buf = mem::MaybeUninit::<libc::rlimit>::zeroed();
2018 let res = unsafe { libc::getrlimit(libc::RLIMIT_MEMLOCK, buf.as_mut_ptr()) };
2019 if res == 0 {
2020 let limit = unsafe { buf.assume_init() };
2021 let rlim_new = limit
2022 .rlim_cur
2023 .saturating_add(vm.get_memory().memory_size() as libc::rlim_t);
2024 let rlim_max = max(limit.rlim_max, rlim_new);
2025 if limit.rlim_cur < rlim_new {
2026 let limit_arg = libc::rlimit {
2027 rlim_cur: rlim_new as libc::rlim_t,
2028 rlim_max: rlim_max as libc::rlim_t,
2029 };
2030 let res = unsafe { libc::setrlimit(libc::RLIMIT_MEMLOCK, &limit_arg) };
2031 if res != 0 {
2032 bail!("Set rlimit failed");
2033 }
2034 }
2035 } else {
2036 bail!("Get rlimit failed");
2037 }
2038 }
2039
Zide Chendfc4b882021-03-10 16:35:37 -08002040 if !iommu_attached_endpoints.is_empty() {
Zide Chen71435c12021-03-03 15:02:02 -08002041 let iommu_dev = create_iommu_device(cfg, phys_max_addr, iommu_attached_endpoints)?;
Zide Chendfc4b882021-03-10 16:35:37 -08002042
Daniel Verkamp6b298582021-08-16 15:37:11 -07002043 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
Zide Chendfc4b882021-03-10 16:35:37 -08002044 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
Peter Fangad3b24e2021-06-21 00:43:29 -07002045 let mut dev =
2046 VirtioPciDevice::new(vm.get_memory().clone(), iommu_dev.dev, msi_device_tube)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002047 .context("failed to create virtio pci dev")?;
Peter Fangad3b24e2021-06-21 00:43:29 -07002048 // early reservation for viommu.
2049 dev.allocate_address(resources)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002050 .context("failed to allocate resources early for virtio pci dev")?;
Peter Fangad3b24e2021-06-21 00:43:29 -07002051 let dev = Box::new(dev);
Tomasz Nowickiab86d522021-09-22 05:50:46 +00002052 devices.push((dev, iommu_dev.jail));
Zide Chendfc4b882021-03-10 16:35:37 -08002053 }
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08002054
2055 if !coiommu_attached_endpoints.is_empty() {
2056 let vfio_container =
2057 VfioCommonSetup::vfio_get_container(IommuDevType::CoIommu, None as Option<&Path>)
2058 .context("failed to get vfio container")?;
2059 let (coiommu_host_tube, coiommu_device_tube) =
2060 Tube::pair().context("failed to create coiommu tube")?;
2061 control_tubes.push(TaggedControlTube::VmMemory(coiommu_host_tube));
2062 let vcpu_count = cfg.vcpu_count.unwrap_or(1) as u64;
Chuanxiao Dong146a13b2021-12-09 12:59:54 +08002063 let (coiommu_tube, balloon_tube) =
2064 Tube::pair().context("failed to create coiommu tube")?;
2065 balloon_inflate_tube = Some(balloon_tube);
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08002066 let dev = CoIommuDev::new(
2067 vm.get_memory().clone(),
2068 vfio_container,
2069 coiommu_device_tube,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +08002070 coiommu_tube,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08002071 coiommu_attached_endpoints,
2072 vcpu_count,
Chuanxiao Dongd4468612022-01-14 14:21:17 +08002073 cfg.coiommu_param.unwrap_or_default(),
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08002074 )
2075 .context("failed to create coiommu device")?;
2076
2077 devices.push((Box::new(dev), simple_jail(cfg, "coiommu")?));
2078 }
Xiong Zhang17b0daf2019-04-23 17:14:50 +08002079 }
2080
Chuanxiao Dong146a13b2021-12-09 12:59:54 +08002081 let stubs = create_virtio_devices(
2082 cfg,
2083 vm,
2084 resources,
2085 exit_evt,
2086 wayland_device_tube,
2087 gpu_device_tube,
2088 vhost_user_gpu_tubes,
2089 balloon_device_tube,
2090 balloon_inflate_tube,
2091 disk_device_tubes,
2092 pmem_device_tubes,
2093 map_request,
2094 fs_device_tubes,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08002095 #[cfg(feature = "gpu")]
2096 render_server_fd,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +08002097 )?;
2098
2099 for stub in stubs {
2100 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
2101 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
2102 let dev = VirtioPciDevice::new(vm.get_memory().clone(), stub.dev, msi_device_tube)
2103 .context("failed to create virtio pci dev")?;
2104 let dev = Box::new(dev) as Box<dyn BusDeviceObj>;
2105 devices.push((dev, stub.jail));
2106 }
2107
2108 #[cfg(feature = "audio")]
2109 for ac97_param in &cfg.ac97_parameters {
2110 let dev = Ac97Dev::try_new(vm.get_memory().clone(), ac97_param.clone())
2111 .context("failed to create ac97 device")?;
2112 let jail = simple_jail(cfg, dev.minijail_policy())?;
2113 devices.push((Box::new(dev), jail));
2114 }
2115
2116 #[cfg(feature = "usb")]
2117 {
2118 // Create xhci controller.
2119 let usb_controller = Box::new(XhciController::new(vm.get_memory().clone(), usb_provider));
2120 devices.push((usb_controller, simple_jail(cfg, "xhci")?));
2121 }
2122
Mattias Nisslerde2c6402021-10-21 12:05:29 +00002123 for params in &cfg.stub_pci_devices {
2124 // Stub devices don't need jailing since they don't do anything.
2125 devices.push((Box::new(StubPciDevice::new(params)), None));
2126 }
2127
Tomasz Nowickiab86d522021-09-22 05:50:46 +00002128 Ok(devices)
David Tolnay2b089fc2019-03-04 15:33:22 -08002129}
2130
Mattias Nisslerbbd91d02021-12-07 08:57:45 +00002131fn create_file_backed_mappings(
2132 cfg: &Config,
2133 vm: &mut impl Vm,
2134 resources: &mut SystemAllocator,
2135) -> Result<()> {
2136 for mapping in &cfg.file_backed_mappings {
2137 let file = OpenOptions::new()
2138 .read(true)
2139 .write(mapping.writable)
2140 .custom_flags(if mapping.sync { libc::O_SYNC } else { 0 })
2141 .open(&mapping.path)
2142 .context("failed to open file for file-backed mapping")?;
2143 let prot = if mapping.writable {
2144 Protection::read_write()
2145 } else {
2146 Protection::read()
2147 };
2148 let size = mapping
2149 .size
2150 .try_into()
2151 .context("Invalid size for file-backed mapping")?;
2152 let memory_mapping = MemoryMappingBuilder::new(size)
2153 .from_file(&file)
2154 .offset(mapping.offset)
2155 .protection(prot)
2156 .build()
2157 .context("failed to map backing file for file-backed mapping")?;
2158
2159 resources
2160 .mmio_allocator_any()
2161 .allocate_at(
2162 mapping.address,
2163 mapping.size,
2164 Alloc::FileBacked(mapping.address),
2165 "file-backed mapping".to_owned(),
2166 )
2167 .context("failed to allocate guest address for file-backed mapping")?;
2168
2169 vm.add_memory_region(
2170 GuestAddress(mapping.address),
2171 Box::new(memory_mapping),
2172 !mapping.writable,
2173 /* log_dirty_pages = */ false,
2174 )
2175 .context("failed to configure file-backed mapping")?;
2176 }
2177
2178 Ok(())
2179}
2180
David Tolnay2b089fc2019-03-04 15:33:22 -08002181#[derive(Copy, Clone)]
Chirantan Ekbote1a2683b2019-11-26 16:28:23 +09002182#[cfg_attr(not(feature = "tpm"), allow(dead_code))]
David Tolnay2b089fc2019-03-04 15:33:22 -08002183struct Ids {
2184 uid: uid_t,
2185 gid: gid_t,
2186}
2187
David Tolnay48c48292019-03-01 16:54:25 -08002188// Set the uid/gid for the jailed process and give a basic id map. This is
2189// required for bind mounts to work.
Fergus Dall51200512021-08-19 12:54:26 +10002190fn add_current_user_to_jail(jail: &mut Minijail) -> Result<Ids> {
2191 let crosvm_uid = geteuid();
2192 let crosvm_gid = getegid();
David Tolnay48c48292019-03-01 16:54:25 -08002193
David Tolnay48c48292019-03-01 16:54:25 -08002194 jail.uidmap(&format!("{0} {0} 1", crosvm_uid))
Daniel Verkamp6b298582021-08-16 15:37:11 -07002195 .context("error setting UID map")?;
David Tolnay48c48292019-03-01 16:54:25 -08002196 jail.gidmap(&format!("{0} {0} 1", crosvm_gid))
Daniel Verkamp6b298582021-08-16 15:37:11 -07002197 .context("error setting GID map")?;
David Tolnay48c48292019-03-01 16:54:25 -08002198
Chirantan Ekbotee1663ee2021-09-03 18:31:25 +09002199 if crosvm_uid != 0 {
2200 jail.change_uid(crosvm_uid);
2201 }
2202 if crosvm_gid != 0 {
2203 jail.change_gid(crosvm_gid);
2204 }
Fergus Dall51200512021-08-19 12:54:26 +10002205
David Tolnay41a6f842019-03-01 16:18:44 -08002206 Ok(Ids {
2207 uid: crosvm_uid,
2208 gid: crosvm_gid,
2209 })
David Tolnay48c48292019-03-01 16:54:25 -08002210}
2211
Chia-I Wu16fb6592021-11-10 11:45:32 -08002212fn add_current_user_as_root_to_jail(jail: &mut Minijail) -> Result<Ids> {
2213 let crosvm_uid = geteuid();
2214 let crosvm_gid = getegid();
2215 jail.uidmap(&format!("0 {0} 1", crosvm_uid))
2216 .context("error setting UID map")?;
2217 jail.gidmap(&format!("0 {0} 1", crosvm_gid))
2218 .context("error setting GID map")?;
2219
2220 Ok(Ids {
2221 uid: crosvm_uid,
2222 gid: crosvm_gid,
2223 })
2224}
2225
Zach Reizner65b98f12019-11-22 17:34:58 -08002226trait IntoUnixStream {
2227 fn into_unix_stream(self) -> Result<UnixStream>;
2228}
2229
2230impl<'a> IntoUnixStream for &'a Path {
2231 fn into_unix_stream(self) -> Result<UnixStream> {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002232 if let Some(fd) = safe_descriptor_from_path(self).context("failed to open event device")? {
Andrew Walbranbc55e302021-07-13 17:35:10 +01002233 Ok(fd.into())
Zach Reizner65b98f12019-11-22 17:34:58 -08002234 } else {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002235 UnixStream::connect(self).context("failed to open event device")
Zach Reizner65b98f12019-11-22 17:34:58 -08002236 }
2237 }
2238}
2239impl<'a> IntoUnixStream for &'a PathBuf {
2240 fn into_unix_stream(self) -> Result<UnixStream> {
2241 self.as_path().into_unix_stream()
2242 }
2243}
2244
2245impl IntoUnixStream for UnixStream {
2246 fn into_unix_stream(self) -> Result<UnixStream> {
2247 Ok(self)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08002248 }
2249}
2250
Steven Richmanf32d0b42020-06-20 21:45:32 -07002251fn setup_vcpu_signal_handler<T: Vcpu>(use_hypervisor_signals: bool) -> Result<()> {
2252 if use_hypervisor_signals {
Matt Delco84cf9c02019-10-07 22:38:13 -07002253 unsafe {
Allen Webb44c728c2021-03-23 15:22:41 -05002254 extern "C" fn handle_signal(_: c_int) {}
Matt Delco84cf9c02019-10-07 22:38:13 -07002255 // Our signal handler does nothing and is trivially async signal safe.
2256 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002257 .context("error registering signal handler")?;
Matt Delco84cf9c02019-10-07 22:38:13 -07002258 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07002259 block_signal(SIGRTMIN() + 0).context("failed to block signal")?;
Matt Delco84cf9c02019-10-07 22:38:13 -07002260 } else {
2261 unsafe {
Allen Webb44c728c2021-03-23 15:22:41 -05002262 extern "C" fn handle_signal<T: Vcpu>(_: c_int) {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002263 T::set_local_immediate_exit(true);
Matt Delco84cf9c02019-10-07 22:38:13 -07002264 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002265 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal::<T>)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002266 .context("error registering signal handler")?;
Matt Delco84cf9c02019-10-07 22:38:13 -07002267 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002268 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002269 Ok(())
2270}
2271
Steven Richmanf32d0b42020-06-20 21:45:32 -07002272// Sets up a vcpu and converts it into a runnable vcpu.
Zach Reizner2c770e62020-09-30 16:49:59 -07002273fn runnable_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002274 cpu_id: usize,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002275 kvm_vcpu_id: usize,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002276 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07002277 vm: impl VmArch,
Zach Reiznerdc748482021-04-14 13:59:30 -07002278 irq_chip: &mut dyn IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002279 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002280 run_rt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002281 vcpu_affinity: Vec<usize>,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002282 no_smt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002283 has_bios: bool,
2284 use_hypervisor_signals: bool,
Yusuke Sato31e136a2021-08-18 11:51:38 -07002285 enable_per_vm_core_scheduling: bool,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002286 host_cpu_topology: bool,
Zach Reizner2c770e62020-09-30 16:49:59 -07002287) -> Result<(V, VcpuRunHandle)>
Steven Richmanf32d0b42020-06-20 21:45:32 -07002288where
Zach Reizner2c770e62020-09-30 16:49:59 -07002289 V: VcpuArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002290{
Zach Reizner304e7312020-09-29 16:00:24 -07002291 let mut vcpu = match vcpu {
2292 Some(v) => v,
2293 None => {
2294 // If vcpu is None, it means this arch/hypervisor requires create_vcpu to be called from
2295 // the vcpu thread.
2296 match vm
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002297 .create_vcpu(kvm_vcpu_id)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002298 .context("failed to create vcpu")?
Zach Reizner304e7312020-09-29 16:00:24 -07002299 .downcast::<V>()
2300 {
2301 Ok(v) => *v,
2302 Err(_) => panic!("VM created wrong type of VCPU"),
2303 }
2304 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002305 };
Dylan Reidbb30b2f2019-10-22 18:30:36 +03002306
Steven Richmanf32d0b42020-06-20 21:45:32 -07002307 irq_chip
Zach Reizner304e7312020-09-29 16:00:24 -07002308 .add_vcpu(cpu_id, &vcpu)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002309 .context("failed to add vcpu to irq chip")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002310
Daniel Verkampcaf9ced2020-09-29 15:35:02 -07002311 if !vcpu_affinity.is_empty() {
2312 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
2313 error!("Failed to set CPU affinity: {}", e);
2314 }
2315 }
2316
Steven Richmanf32d0b42020-06-20 21:45:32 -07002317 Arch::configure_vcpu(
Daniel Verkamp6f4f8222022-01-05 14:09:09 -08002318 &vm,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002319 vm.get_hypervisor(),
2320 irq_chip,
2321 &mut vcpu,
2322 cpu_id,
2323 vcpu_count,
2324 has_bios,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002325 no_smt,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002326 host_cpu_topology,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002327 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002328 .context("failed to configure vcpu")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002329
Yusuke Sato31e136a2021-08-18 11:51:38 -07002330 if !enable_per_vm_core_scheduling {
2331 // Do per-vCPU core scheduling by setting a unique cookie to each vCPU.
2332 if let Err(e) = enable_core_scheduling() {
2333 error!("Failed to enable core scheduling: {}", e);
2334 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002335 }
2336
Kansho Nishidaab205af2020-08-13 18:17:50 +09002337 if run_rt {
2338 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
2339 if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
2340 .and_then(|_| set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL)))
2341 {
2342 warn!("Failed to set vcpu to real time: {}", e);
2343 }
2344 }
2345
Steven Richmanf32d0b42020-06-20 21:45:32 -07002346 if use_hypervisor_signals {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002347 let mut v = get_blocked_signals().context("failed to retrieve signal mask for vcpu")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002348 v.retain(|&x| x != SIGRTMIN() + 0);
Daniel Verkamp6b298582021-08-16 15:37:11 -07002349 vcpu.set_signal_mask(&v)
2350 .context("failed to set the signal mask for vcpu")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002351 }
2352
Zach Reizner2c770e62020-09-30 16:49:59 -07002353 let vcpu_run_handle = vcpu
2354 .take_run_handle(Some(SIGRTMIN() + 0))
Daniel Verkamp6b298582021-08-16 15:37:11 -07002355 .context("failed to set thread id for vcpu")?;
Zach Reizner2c770e62020-09-30 16:49:59 -07002356
2357 Ok((vcpu, vcpu_run_handle))
Dylan Reidbb30b2f2019-10-22 18:30:36 +03002358}
2359
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002360#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2361fn handle_debug_msg<V>(
2362 cpu_id: usize,
2363 vcpu: &V,
2364 guest_mem: &GuestMemory,
2365 d: VcpuDebug,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002366 reply_tube: &mpsc::Sender<VcpuDebugStatusMessage>,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002367) -> Result<()>
2368where
2369 V: VcpuArch + 'static,
2370{
2371 match d {
2372 VcpuDebug::ReadRegs => {
2373 let msg = VcpuDebugStatusMessage {
2374 cpu: cpu_id as usize,
2375 msg: VcpuDebugStatus::RegValues(
Daniel Verkamp6b298582021-08-16 15:37:11 -07002376 Arch::debug_read_registers(vcpu as &V)
2377 .context("failed to handle a gdb ReadRegs command")?,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002378 ),
2379 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002380 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002381 .send(msg)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002382 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002383 }
2384 VcpuDebug::WriteRegs(regs) => {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002385 Arch::debug_write_registers(vcpu as &V, &regs)
2386 .context("failed to handle a gdb WriteRegs command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002387 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002388 .send(VcpuDebugStatusMessage {
2389 cpu: cpu_id as usize,
2390 msg: VcpuDebugStatus::CommandComplete,
2391 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002392 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002393 }
2394 VcpuDebug::ReadMem(vaddr, len) => {
2395 let msg = VcpuDebugStatusMessage {
2396 cpu: cpu_id as usize,
2397 msg: VcpuDebugStatus::MemoryRegion(
2398 Arch::debug_read_memory(vcpu as &V, guest_mem, vaddr, len)
2399 .unwrap_or(Vec::new()),
2400 ),
2401 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002402 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002403 .send(msg)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002404 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002405 }
2406 VcpuDebug::WriteMem(vaddr, buf) => {
2407 Arch::debug_write_memory(vcpu as &V, guest_mem, vaddr, &buf)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002408 .context("failed to handle a gdb WriteMem command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002409 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002410 .send(VcpuDebugStatusMessage {
2411 cpu: cpu_id as usize,
2412 msg: VcpuDebugStatus::CommandComplete,
2413 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002414 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002415 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002416 VcpuDebug::EnableSinglestep => {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002417 Arch::debug_enable_singlestep(vcpu as &V)
2418 .context("failed to handle a gdb EnableSingleStep command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002419 reply_tube
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002420 .send(VcpuDebugStatusMessage {
2421 cpu: cpu_id as usize,
2422 msg: VcpuDebugStatus::CommandComplete,
2423 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002424 .context("failed to send a debug status to GDB thread")
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002425 }
2426 VcpuDebug::SetHwBreakPoint(addrs) => {
2427 Arch::debug_set_hw_breakpoints(vcpu as &V, &addrs)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002428 .context("failed to handle a gdb SetHwBreakPoint command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002429 reply_tube
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002430 .send(VcpuDebugStatusMessage {
2431 cpu: cpu_id as usize,
2432 msg: VcpuDebugStatus::CommandComplete,
2433 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002434 .context("failed to send a debug status to GDB thread")
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002435 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002436 }
2437}
2438
Zach Reizner2c770e62020-09-30 16:49:59 -07002439fn run_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002440 cpu_id: usize,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002441 kvm_vcpu_id: usize,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002442 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07002443 vm: impl VmArch + 'static,
Zach Reiznerdc748482021-04-14 13:59:30 -07002444 mut irq_chip: Box<dyn IrqChipArch + 'static>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002445 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002446 run_rt: bool,
Daniel Verkamp107edb32019-04-05 09:58:48 -07002447 vcpu_affinity: Vec<usize>,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002448 delay_rt: bool,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002449 no_smt: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07002450 start_barrier: Arc<Barrier>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002451 has_bios: bool,
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07002452 mut io_bus: devices::Bus,
2453 mut mmio_bus: devices::Bus,
Michael Hoyle685316f2020-09-16 15:29:20 -07002454 exit_evt: Event,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002455 requires_pvclock_ctrl: bool,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002456 from_main_tube: mpsc::Receiver<VcpuControl>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002457 use_hypervisor_signals: bool,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002458 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_tube: Option<
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002459 mpsc::Sender<VcpuDebugStatusMessage>,
2460 >,
Yusuke Sato31e136a2021-08-18 11:51:38 -07002461 enable_per_vm_core_scheduling: bool,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002462 host_cpu_topology: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002463) -> Result<JoinHandle<()>>
2464where
Zach Reizner2c770e62020-09-30 16:49:59 -07002465 V: VcpuArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002466{
Zach Reizner8fb52112017-12-13 16:04:39 -08002467 thread::Builder::new()
2468 .name(format!("crosvm_vcpu{}", cpu_id))
2469 .spawn(move || {
Zach Reizner95885312020-01-29 18:06:01 -08002470 // The VCPU thread must trigger the `exit_evt` in all paths, and a `ScopedEvent`'s Drop
2471 // implementation accomplishes that.
2472 let _scoped_exit_evt = ScopedEvent::from(exit_evt);
2473
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002474 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2475 let guest_mem = vm.get_memory().clone();
Zach Reizner2c770e62020-09-30 16:49:59 -07002476 let runnable_vcpu = runnable_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002477 cpu_id,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002478 kvm_vcpu_id,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002479 vcpu,
2480 vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07002481 irq_chip.as_mut(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002482 vcpu_count,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002483 run_rt && !delay_rt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002484 vcpu_affinity,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002485 no_smt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002486 has_bios,
2487 use_hypervisor_signals,
Yusuke Sato31e136a2021-08-18 11:51:38 -07002488 enable_per_vm_core_scheduling,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002489 host_cpu_topology,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002490 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08002491
Zach Reizner8fb52112017-12-13 16:04:39 -08002492 start_barrier.wait();
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002493
Zach Reizner2c770e62020-09-30 16:49:59 -07002494 let (vcpu, vcpu_run_handle) = match runnable_vcpu {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002495 Ok(v) => v,
2496 Err(e) => {
Maciek Swiechc3011222021-11-24 21:01:04 +00002497 error!("failed to start vcpu {}: {:#}", cpu_id, e);
Steven Richmanf32d0b42020-06-20 21:45:32 -07002498 return;
2499 }
2500 };
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002501
Dylan Reidb0492662019-05-17 14:50:13 -07002502 let mut run_mode = VmRunMode::Running;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002503 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002504 if to_gdb_tube.is_some() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002505 // Wait until a GDB client attaches
2506 run_mode = VmRunMode::Breakpoint;
2507 }
2508
Dylan Reidb0492662019-05-17 14:50:13 -07002509 let mut interrupted_by_signal = false;
2510
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07002511 mmio_bus.set_access_id(cpu_id);
2512 io_bus.set_access_id(cpu_id);
2513
Dylan Reidb0492662019-05-17 14:50:13 -07002514 'vcpu_loop: loop {
2515 // Start by checking for messages to process and the run state of the CPU.
2516 // An extra check here for Running so there isn't a need to call recv unless a
2517 // message is likely to be ready because a signal was sent.
2518 if interrupted_by_signal || run_mode != VmRunMode::Running {
2519 'state_loop: loop {
2520 // Tries to get a pending message without blocking first.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002521 let msg = match from_main_tube.try_recv() {
Dylan Reidb0492662019-05-17 14:50:13 -07002522 Ok(m) => m,
2523 Err(mpsc::TryRecvError::Empty) if run_mode == VmRunMode::Running => {
2524 // If the VM is running and no message is pending, the state won't
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002525 // change.
Dylan Reidb0492662019-05-17 14:50:13 -07002526 break 'state_loop;
2527 }
2528 Err(mpsc::TryRecvError::Empty) => {
2529 // If the VM is not running, wait until a message is ready.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002530 match from_main_tube.recv() {
Dylan Reidb0492662019-05-17 14:50:13 -07002531 Ok(m) => m,
2532 Err(mpsc::RecvError) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002533 error!("Failed to read from main tube in vcpu");
Dylan Reidb0492662019-05-17 14:50:13 -07002534 break 'vcpu_loop;
2535 }
2536 }
2537 }
2538 Err(mpsc::TryRecvError::Disconnected) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002539 error!("Failed to read from main tube in vcpu");
Dylan Reidb0492662019-05-17 14:50:13 -07002540 break 'vcpu_loop;
2541 }
2542 };
2543
2544 // Collect all pending messages.
2545 let mut messages = vec![msg];
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002546 messages.append(&mut from_main_tube.try_iter().collect());
Dylan Reidb0492662019-05-17 14:50:13 -07002547
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002548 for msg in messages {
2549 match msg {
2550 VcpuControl::RunState(new_mode) => {
2551 run_mode = new_mode;
2552 match run_mode {
2553 VmRunMode::Running => break 'state_loop,
2554 VmRunMode::Suspending => {
2555 // On KVM implementations that use a paravirtualized
2556 // clock (e.g. x86), a flag must be set to indicate to
2557 // the guest kernel that a vCPU was suspended. The guest
2558 // kernel will use this flag to prevent the soft lockup
2559 // detection from triggering when this vCPU resumes,
2560 // which could happen days later in realtime.
2561 if requires_pvclock_ctrl {
2562 if let Err(e) = vcpu.pvclock_ctrl() {
2563 error!(
2564 "failed to tell hypervisor vcpu {} is suspending: {}",
2565 cpu_id, e
2566 );
2567 }
2568 }
2569 }
2570 VmRunMode::Breakpoint => {}
2571 VmRunMode::Exiting => break 'vcpu_loop,
2572 }
2573 }
2574 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2575 VcpuControl::Debug(d) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002576 match &to_gdb_tube {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002577 Some(ref ch) => {
2578 if let Err(e) = handle_debug_msg(
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07002579 cpu_id, &vcpu, &guest_mem, d, ch,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002580 ) {
2581 error!("Failed to handle gdb message: {}", e);
2582 }
2583 },
2584 None => {
2585 error!("VcpuControl::Debug received while GDB feature is disabled: {:?}", d);
Dylan Reidb0492662019-05-17 14:50:13 -07002586 }
2587 }
2588 }
Suleiman Souhlal2ac78b92021-02-01 12:33:26 +09002589 VcpuControl::MakeRT => {
2590 if run_rt && delay_rt {
2591 info!("Making vcpu {} RT\n", cpu_id);
2592 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
2593 if let Err(e) = set_rt_prio_limit(
2594 u64::from(DEFAULT_VCPU_RT_LEVEL))
2595 .and_then(|_|
2596 set_rt_round_robin(
2597 i32::from(DEFAULT_VCPU_RT_LEVEL)
2598 ))
2599 {
2600 warn!("Failed to set vcpu to real time: {}", e);
2601 }
2602 }
2603 }
Dylan Reidb0492662019-05-17 14:50:13 -07002604 }
2605 }
2606 }
2607 }
2608
2609 interrupted_by_signal = false;
2610
Steven Richman11dc6712020-09-02 15:39:14 -07002611 // Vcpus may have run a HLT instruction, which puts them into a state other than
2612 // VcpuRunState::Runnable. In that case, this call to wait_until_runnable blocks
2613 // until either the irqchip receives an interrupt for this vcpu, or until the main
2614 // thread kicks this vcpu as a result of some VmControl operation. In most IrqChip
2615 // implementations HLT instructions do not make it to crosvm, and thus this is a
2616 // no-op that always returns VcpuRunState::Runnable.
2617 match irq_chip.wait_until_runnable(&vcpu) {
2618 Ok(VcpuRunState::Runnable) => {}
2619 Ok(VcpuRunState::Interrupted) => interrupted_by_signal = true,
2620 Err(e) => error!(
2621 "error waiting for vcpu {} to become runnable: {}",
2622 cpu_id, e
2623 ),
2624 }
2625
2626 if !interrupted_by_signal {
2627 match vcpu.run(&vcpu_run_handle) {
2628 Ok(VcpuExit::IoIn { port, mut size }) => {
2629 let mut data = [0; 8];
2630 if size > data.len() {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002631 error!("unsupported IoIn size of {} bytes at port {:#x}", size, port);
Steven Richman11dc6712020-09-02 15:39:14 -07002632 size = data.len();
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002633 }
Steven Richman11dc6712020-09-02 15:39:14 -07002634 io_bus.read(port as u64, &mut data[..size]);
2635 if let Err(e) = vcpu.set_data(&data[..size]) {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002636 error!("failed to set return data for IoIn at port {:#x}: {}", port, e);
Steven Richman11dc6712020-09-02 15:39:14 -07002637 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002638 }
Steven Richman11dc6712020-09-02 15:39:14 -07002639 Ok(VcpuExit::IoOut {
2640 port,
2641 mut size,
2642 data,
2643 }) => {
2644 if size > data.len() {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002645 error!("unsupported IoOut size of {} bytes at port {:#x}", size, port);
Steven Richman11dc6712020-09-02 15:39:14 -07002646 size = data.len();
2647 }
2648 io_bus.write(port as u64, &data[..size]);
2649 }
2650 Ok(VcpuExit::MmioRead { address, size }) => {
2651 let mut data = [0; 8];
2652 mmio_bus.read(address, &mut data[..size]);
2653 // Setting data for mmio can not fail.
2654 let _ = vcpu.set_data(&data[..size]);
2655 }
2656 Ok(VcpuExit::MmioWrite {
2657 address,
2658 size,
2659 data,
2660 }) => {
2661 mmio_bus.write(address, &data[..size]);
2662 }
2663 Ok(VcpuExit::IoapicEoi { vector }) => {
2664 if let Err(e) = irq_chip.broadcast_eoi(vector) {
2665 error!(
2666 "failed to broadcast eoi {} on vcpu {}: {}",
2667 vector, cpu_id, e
2668 );
2669 }
2670 }
2671 Ok(VcpuExit::IrqWindowOpen) => {}
Leo Lai558460f2021-07-23 05:32:27 +00002672 Ok(VcpuExit::Hlt) => irq_chip.halted(cpu_id),
Steven Richman11dc6712020-09-02 15:39:14 -07002673 Ok(VcpuExit::Shutdown) => break,
2674 Ok(VcpuExit::FailEntry {
2675 hardware_entry_failure_reason,
2676 }) => {
2677 error!("vcpu hw run failure: {:#x}", hardware_entry_failure_reason);
Steven Richmanf32d0b42020-06-20 21:45:32 -07002678 break;
2679 }
Steven Richman11dc6712020-09-02 15:39:14 -07002680 Ok(VcpuExit::SystemEvent(_, _)) => break,
2681 Ok(VcpuExit::Debug { .. }) => {
2682 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2683 {
2684 let msg = VcpuDebugStatusMessage {
2685 cpu: cpu_id as usize,
2686 msg: VcpuDebugStatus::HitBreakPoint,
2687 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002688 if let Some(ref ch) = to_gdb_tube {
Steven Richman11dc6712020-09-02 15:39:14 -07002689 if let Err(e) = ch.send(msg) {
2690 error!("failed to notify breakpoint to GDB thread: {}", e);
2691 break;
2692 }
2693 }
2694 run_mode = VmRunMode::Breakpoint;
2695 }
2696 }
2697 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
2698 Err(e) => match e.errno() {
2699 libc::EINTR => interrupted_by_signal = true,
2700 libc::EAGAIN => {}
2701 _ => {
2702 error!("vcpu hit unknown error: {}", e);
2703 break;
2704 }
2705 },
2706 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002707 }
2708
2709 if interrupted_by_signal {
2710 if use_hypervisor_signals {
2711 // Try to clear the signal that we use to kick VCPU if it is pending before
2712 // attempting to handle pause requests.
2713 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
2714 error!("failed to clear pending signal: {}", e);
2715 break;
2716 }
2717 } else {
2718 vcpu.set_immediate_exit(false);
2719 }
David Tolnay8f3a2322018-11-30 17:11:35 -08002720 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002721
Steven Richman11dc6712020-09-02 15:39:14 -07002722 if let Err(e) = irq_chip.inject_interrupts(&vcpu) {
2723 error!("failed to inject interrupts for vcpu {}: {}", cpu_id, e);
2724 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002725 }
David Tolnay2bac1e72018-12-12 14:33:42 -08002726 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002727 .context("failed to spawn VCPU thread")
Zach Reizner39aa26b2017-12-12 18:03:23 -08002728}
2729
Zach Reiznera90649a2021-03-31 12:56:08 -07002730fn setup_vm_components(cfg: &Config) -> Result<VmComponents> {
David Tolnay2b089fc2019-03-04 15:33:22 -08002731 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +01002732 Some(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002733 open_file(
2734 initrd_path,
2735 true, /*read_only*/
2736 false, /*O_DIRECT*/
2737 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002738 .with_context(|| format!("failed to open initrd {}", initrd_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +01002739 )
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002740 } else {
2741 None
2742 };
2743
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002744 let vm_image = match cfg.executable_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +01002745 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002746 open_file(
2747 kernel_path,
2748 true, /*read_only*/
2749 false, /*O_DIRECT*/
2750 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002751 .with_context(|| format!("failed to open kernel image {}", kernel_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +01002752 ),
2753 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002754 open_file(bios_path, true /*read_only*/, false /*O_DIRECT*/)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002755 .with_context(|| format!("failed to open bios {}", bios_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +01002756 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002757 _ => panic!("Did not receive a bios or kernel, should be impossible."),
2758 };
2759
Will Deaconc48e7832021-07-30 19:03:06 +01002760 let swiotlb = if let Some(size) = cfg.swiotlb {
2761 Some(
2762 size.checked_mul(1024 * 1024)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002763 .ok_or_else(|| anyhow!("requested swiotlb size too large"))?,
Will Deaconc48e7832021-07-30 19:03:06 +01002764 )
2765 } else {
2766 match cfg.protected_vm {
Andrew Walbran0bbbb682021-12-13 13:42:07 +00002767 ProtectionType::Protected | ProtectionType::ProtectedWithoutFirmware => {
2768 Some(64 * 1024 * 1024)
2769 }
Will Deaconc48e7832021-07-30 19:03:06 +01002770 ProtectionType::Unprotected => None,
2771 }
2772 };
2773
Zach Reiznera90649a2021-03-31 12:56:08 -07002774 Ok(VmComponents {
Daniel Verkamp6a847062019-11-26 13:16:35 -08002775 memory_size: cfg
2776 .memory
2777 .unwrap_or(256)
2778 .checked_mul(1024 * 1024)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002779 .ok_or_else(|| anyhow!("requested memory size too large"))?,
Will Deaconc48e7832021-07-30 19:03:06 +01002780 swiotlb,
Dylan Reid059a1882018-07-23 17:58:09 -07002781 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -07002782 vcpu_affinity: cfg.vcpu_affinity.clone(),
Daniel Verkamp8a72afc2021-03-15 17:55:52 -07002783 cpu_clusters: cfg.cpu_clusters.clone(),
2784 cpu_capacity: cfg.cpu_capacity.clone(),
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002785 no_smt: cfg.no_smt,
Sergey Senozhatsky1e369c52021-04-13 20:23:51 +09002786 hugepages: cfg.hugepages,
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002787 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002788 android_fstab: cfg
2789 .android_fstab
2790 .as_ref()
Daniel Verkamp6b298582021-08-16 15:37:11 -07002791 .map(|x| {
2792 File::open(x)
2793 .with_context(|| format!("failed to open android fstab file {}", x.display()))
2794 })
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002795 .map_or(Ok(None), |v| v.map(Some))?,
Kansho Nishida282115b2019-12-18 13:13:14 +09002796 pstore: cfg.pstore.clone(),
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002797 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -07002798 extra_kernel_params: cfg.params.clone(),
Tomasz Jeznach42644642020-05-20 23:27:59 -07002799 acpi_sdts: cfg
2800 .acpi_tables
2801 .iter()
Daniel Verkamp6b298582021-08-16 15:37:11 -07002802 .map(|path| {
2803 SDT::from_file(path)
2804 .with_context(|| format!("failed to open ACPI file {}", path.display()))
2805 })
Tomasz Jeznach42644642020-05-20 23:27:59 -07002806 .collect::<Result<Vec<SDT>>>()?,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002807 rt_cpus: cfg.rt_cpus.clone(),
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002808 delay_rt: cfg.delay_rt,
Will Deacon7d2b8ac2020-10-06 18:51:12 +01002809 protected_vm: cfg.protected_vm,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002810 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznera90649a2021-03-31 12:56:08 -07002811 gdb: None,
Tomasz Jeznachccb26942021-03-30 22:44:11 -07002812 dmi_path: cfg.dmi_path.clone(),
Tomasz Jeznachd93c29f2021-04-12 11:00:24 -07002813 no_legacy: cfg.no_legacy,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002814 host_cpu_topology: cfg.host_cpu_topology,
Zach Reiznera90649a2021-03-31 12:56:08 -07002815 })
2816}
2817
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002818pub enum ExitState {
2819 Reset,
2820 Stop,
2821}
2822
2823pub fn run_config(cfg: Config) -> Result<ExitState> {
Zach Reiznerdc748482021-04-14 13:59:30 -07002824 let components = setup_vm_components(&cfg)?;
2825
2826 let guest_mem_layout =
Daniel Verkamp6b298582021-08-16 15:37:11 -07002827 Arch::guest_memory_layout(&components).context("failed to create guest memory layout")?;
2828 let guest_mem = GuestMemory::new(&guest_mem_layout).context("failed to create guest memory")?;
Zach Reiznerdc748482021-04-14 13:59:30 -07002829 let mut mem_policy = MemoryPolicy::empty();
2830 if components.hugepages {
2831 mem_policy |= MemoryPolicy::USE_HUGEPAGES;
2832 }
Quentin Perret26203802021-12-02 09:48:43 +00002833 guest_mem.set_memory_policy(mem_policy);
Daniel Verkamp6b298582021-08-16 15:37:11 -07002834 let kvm = Kvm::new_with_path(&cfg.kvm_device_path).context("failed to create kvm")?;
Andrew Walbran00f1c9f2021-12-10 17:13:08 +00002835 let vm = KvmVm::new(&kvm, guest_mem, components.protected_vm).context("failed to create vm")?;
Daniel Verkamp6b298582021-08-16 15:37:11 -07002836 let vm_clone = vm.try_clone().context("failed to clone vm")?;
Zach Reiznerdc748482021-04-14 13:59:30 -07002837
2838 enum KvmIrqChip {
2839 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2840 Split(KvmSplitIrqChip),
2841 Kernel(KvmKernelIrqChip),
2842 }
2843
2844 impl KvmIrqChip {
2845 fn as_mut(&mut self) -> &mut dyn IrqChipArch {
2846 match self {
2847 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2848 KvmIrqChip::Split(i) => i,
2849 KvmIrqChip::Kernel(i) => i,
2850 }
2851 }
2852 }
2853
2854 let ioapic_host_tube;
2855 let mut irq_chip = if cfg.split_irqchip {
2856 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
2857 unimplemented!("KVM split irqchip mode only supported on x86 processors");
2858 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2859 {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002860 let (host_tube, ioapic_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerdc748482021-04-14 13:59:30 -07002861 ioapic_host_tube = Some(host_tube);
2862 KvmIrqChip::Split(
2863 KvmSplitIrqChip::new(
2864 vm_clone,
2865 components.vcpu_count,
2866 ioapic_device_tube,
2867 Some(120),
2868 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002869 .context("failed to create IRQ chip")?,
Zach Reiznerdc748482021-04-14 13:59:30 -07002870 )
2871 }
2872 } else {
2873 ioapic_host_tube = None;
2874 KvmIrqChip::Kernel(
Daniel Verkamp6b298582021-08-16 15:37:11 -07002875 KvmKernelIrqChip::new(vm_clone, components.vcpu_count)
2876 .context("failed to create IRQ chip")?,
Zach Reiznerdc748482021-04-14 13:59:30 -07002877 )
2878 };
2879
2880 run_vm::<KvmVcpu, KvmVm>(cfg, components, vm, irq_chip.as_mut(), ioapic_host_tube)
2881}
2882
2883fn run_vm<Vcpu, V>(
Zach Reiznera90649a2021-03-31 12:56:08 -07002884 cfg: Config,
2885 #[allow(unused_mut)] mut components: VmComponents,
Zach Reiznerdc748482021-04-14 13:59:30 -07002886 mut vm: V,
2887 irq_chip: &mut dyn IrqChipArch,
2888 ioapic_host_tube: Option<Tube>,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002889) -> Result<ExitState>
Zach Reiznera90649a2021-03-31 12:56:08 -07002890where
2891 Vcpu: VcpuArch + 'static,
2892 V: VmArch + 'static,
Zach Reiznera90649a2021-03-31 12:56:08 -07002893{
2894 if cfg.sandbox {
2895 // Printing something to the syslog before entering minijail so that libc's syslogger has a
2896 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
2897 // access to those files will not be possible.
2898 info!("crosvm entering multiprocess mode");
2899 }
2900
Daniel Verkampf1439d42021-05-21 13:55:10 -07002901 #[cfg(feature = "usb")]
Zach Reiznera90649a2021-03-31 12:56:08 -07002902 let (usb_control_tube, usb_provider) =
Daniel Verkamp6b298582021-08-16 15:37:11 -07002903 HostBackendDeviceProvider::new().context("failed to create usb provider")?;
Daniel Verkampf1439d42021-05-21 13:55:10 -07002904
Zach Reiznera90649a2021-03-31 12:56:08 -07002905 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
2906 // before any jailed devices have been spawned, so that we can catch any of them that fail very
2907 // quickly.
Daniel Verkamp6b298582021-08-16 15:37:11 -07002908 let sigchld_fd = SignalFd::new(libc::SIGCHLD).context("failed to create signalfd")?;
Dylan Reid059a1882018-07-23 17:58:09 -07002909
Zach Reiznera60744b2019-02-13 17:33:32 -08002910 let control_server_socket = match &cfg.socket_path {
2911 Some(path) => Some(UnlinkUnixSeqpacketListener(
Daniel Verkamp6b298582021-08-16 15:37:11 -07002912 UnixSeqpacketListener::bind(path).context("failed to create control server")?,
Zach Reiznera60744b2019-02-13 17:33:32 -08002913 )),
2914 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07002915 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002916
Zach Reiznera90649a2021-03-31 12:56:08 -07002917 let mut control_tubes = Vec::new();
2918
2919 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2920 if let Some(port) = cfg.gdb {
2921 // GDB needs a control socket to interrupt vcpus.
Daniel Verkamp6b298582021-08-16 15:37:11 -07002922 let (gdb_host_tube, gdb_control_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznera90649a2021-03-31 12:56:08 -07002923 control_tubes.push(TaggedControlTube::Vm(gdb_host_tube));
2924 components.gdb = Some((port, gdb_control_tube));
2925 }
2926
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09002927 for wl_cfg in &cfg.vhost_user_wl {
2928 let wayland_host_tube = UnixSeqpacket::connect(&wl_cfg.vm_tube)
2929 .map(Tube::new)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002930 .context("failed to connect to wayland tube")?;
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09002931 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
2932 }
2933
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002934 let mut vhost_user_gpu_tubes = Vec::with_capacity(cfg.vhost_user_gpu.len());
2935 for _ in 0..cfg.vhost_user_gpu.len() {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002936 let (host_tube, device_tube) = Tube::pair().context("failed to create tube")?;
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002937 vhost_user_gpu_tubes.push((
Daniel Verkamp6b298582021-08-16 15:37:11 -07002938 host_tube.try_clone().context("failed to clone tube")?,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002939 device_tube,
2940 ));
2941 control_tubes.push(TaggedControlTube::VmMemory(host_tube));
2942 }
2943
Daniel Verkamp6b298582021-08-16 15:37:11 -07002944 let (wayland_host_tube, wayland_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002945 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
Dylan Reid059a1882018-07-23 17:58:09 -07002946 // Balloon gets a special socket so balloon requests can be forwarded from the main process.
Daniel Verkamp6b298582021-08-16 15:37:11 -07002947 let (balloon_host_tube, balloon_device_tube) = Tube::pair().context("failed to create tube")?;
Hikaru Nishidaaf3f3bb2021-05-21 12:03:54 +09002948 // Set recv timeout to avoid deadlock on sending BalloonControlCommand before guest is ready.
2949 balloon_host_tube
2950 .set_recv_timeout(Some(Duration::from_millis(100)))
Daniel Verkamp6b298582021-08-16 15:37:11 -07002951 .context("failed to create tube")?;
Dylan Reid059a1882018-07-23 17:58:09 -07002952
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002953 // Create one control socket per disk.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002954 let mut disk_device_tubes = Vec::new();
2955 let mut disk_host_tubes = Vec::new();
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002956 let disk_count = cfg.disks.len();
2957 for _ in 0..disk_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002958 let (disk_host_tub, disk_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002959 disk_host_tubes.push(disk_host_tub);
2960 disk_device_tubes.push(disk_device_tube);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002961 }
2962
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002963 let mut pmem_device_tubes = Vec::new();
Daniel Verkampe1980a92020-02-07 11:00:55 -08002964 let pmem_count = cfg.pmem_devices.len();
2965 for _ in 0..pmem_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002966 let (pmem_host_tube, pmem_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002967 pmem_device_tubes.push(pmem_device_tube);
2968 control_tubes.push(TaggedControlTube::VmMsync(pmem_host_tube));
Daniel Verkampe1980a92020-02-07 11:00:55 -08002969 }
2970
Daniel Verkamp6b298582021-08-16 15:37:11 -07002971 let (gpu_host_tube, gpu_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002972 control_tubes.push(TaggedControlTube::VmMemory(gpu_host_tube));
Gurchetan Singh96beafc2019-05-15 09:46:52 -07002973
Zach Reiznerdc748482021-04-14 13:59:30 -07002974 if let Some(ioapic_host_tube) = ioapic_host_tube {
2975 control_tubes.push(TaggedControlTube::VmIrq(ioapic_host_tube));
2976 }
Zhuocheng Dingf2e90bf2019-12-02 15:50:20 +08002977
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002978 let battery = if cfg.battery_type.is_some() {
Daniel Verkampcfe49462021-08-19 17:11:05 -07002979 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(clippy::manual_map))]
Alex Lauf408c732020-11-10 18:24:04 +09002980 let jail = match simple_jail(&cfg, "battery")? {
Daniel Verkampcfe49462021-08-19 17:11:05 -07002981 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(unused_mut))]
Alex Lauf408c732020-11-10 18:24:04 +09002982 Some(mut jail) => {
2983 // Setup a bind mount to the system D-Bus socket if the powerd monitor is used.
2984 #[cfg(feature = "power-monitor-powerd")]
2985 {
Fergus Dall51200512021-08-19 12:54:26 +10002986 add_current_user_to_jail(&mut jail)?;
Alex Lauf408c732020-11-10 18:24:04 +09002987
2988 // Create a tmpfs in the device's root directory so that we can bind mount files.
2989 jail.mount_with_data(
2990 Path::new("none"),
2991 Path::new("/"),
2992 "tmpfs",
2993 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
2994 "size=67108864",
2995 )?;
2996
2997 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
2998 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
2999 }
3000 Some(jail)
3001 }
3002 None => None,
3003 };
3004 (&cfg.battery_type, jail)
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08003005 } else {
3006 (&cfg.battery_type, None)
3007 };
3008
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08003009 let map_request: Arc<Mutex<Option<ExternalMapping>>> = Arc::new(Mutex::new(None));
3010
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003011 let fs_count = cfg
3012 .shared_dirs
3013 .iter()
3014 .filter(|sd| sd.kind == SharedDirKind::FS)
3015 .count();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003016 let mut fs_device_tubes = Vec::with_capacity(fs_count);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003017 for _ in 0..fs_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07003018 let (fs_host_tube, fs_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003019 control_tubes.push(TaggedControlTube::Fs(fs_host_tube));
3020 fs_device_tubes.push(fs_device_tube);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003021 }
3022
Daniel Verkamp6b298582021-08-16 15:37:11 -07003023 let exit_evt = Event::new().context("failed to create event")?;
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003024 let reset_evt = Event::new().context("failed to create event")?;
Daniel Verkamp6f4f8222022-01-05 14:09:09 -08003025 let mut sys_allocator = Arch::create_system_allocator(&vm);
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09003026
3027 // Allocate the ramoops region first. AArch64::build_vm() assumes this.
3028 let ramoops_region = match &components.pstore {
3029 Some(pstore) => Some(
Dennis Kempin65740a62021-10-18 16:46:57 -07003030 arch::pstore::create_memory_region(&mut vm, &mut sys_allocator, pstore)
Daniel Verkamp6b298582021-08-16 15:37:11 -07003031 .context("failed to allocate pstore region")?,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09003032 ),
3033 None => None,
3034 };
3035
Mattias Nisslerbbd91d02021-12-07 08:57:45 +00003036 create_file_backed_mappings(&cfg, &mut vm, &mut sys_allocator)?;
3037
Daniel Verkamp891ea3e2022-01-04 12:35:55 -08003038 let phys_max_addr = (1u64 << vm.get_guest_phys_addr_bits()) - 1;
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08003039
3040 #[cfg(feature = "gpu")]
3041 // Hold on to the render server jail so it keeps running until we exit run_vm()
3042 let mut _render_server_jail = None;
3043 #[cfg(feature = "gpu")]
3044 let mut render_server_fd = None;
3045 #[cfg(feature = "gpu")]
3046 if let Some(gpu_parameters) = &cfg.gpu_parameters {
3047 if let Some(ref render_server_parameters) = gpu_parameters.render_server {
3048 let (jail, fd) = start_gpu_render_server(&cfg, render_server_parameters)?;
3049 _render_server_jail = Some(ScopedMinijail(jail));
3050 render_server_fd = Some(fd);
3051 }
3052 }
3053
Tomasz Nowickiab86d522021-09-22 05:50:46 +00003054 let mut devices = create_devices(
Zach Reiznerdc748482021-04-14 13:59:30 -07003055 &cfg,
3056 &mut vm,
3057 &mut sys_allocator,
3058 &exit_evt,
Zide Chen71435c12021-03-03 15:02:02 -08003059 phys_max_addr,
Zach Reiznerdc748482021-04-14 13:59:30 -07003060 &mut control_tubes,
3061 wayland_device_tube,
3062 gpu_device_tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09003063 vhost_user_gpu_tubes,
Zach Reiznerdc748482021-04-14 13:59:30 -07003064 balloon_device_tube,
3065 &mut disk_device_tubes,
3066 &mut pmem_device_tubes,
3067 &mut fs_device_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07003068 #[cfg(feature = "usb")]
Zach Reiznerdc748482021-04-14 13:59:30 -07003069 usb_provider,
3070 Arc::clone(&map_request),
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08003071 #[cfg(feature = "gpu")]
3072 render_server_fd,
Zach Reiznerdc748482021-04-14 13:59:30 -07003073 )?;
3074
Peter Fangc2bba082021-04-19 18:40:24 -07003075 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Tomasz Nowickiab86d522021-09-22 05:50:46 +00003076 for device in devices
3077 .iter_mut()
3078 .filter_map(|(dev, _)| dev.as_pci_device_mut())
3079 {
Peter Fangc2bba082021-04-19 18:40:24 -07003080 let sdts = device
3081 .generate_acpi(components.acpi_sdts)
3082 .or_else(|| {
3083 error!("ACPI table generation error");
3084 None
3085 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07003086 .ok_or_else(|| anyhow!("failed to generate ACPI table"))?;
Peter Fangc2bba082021-04-19 18:40:24 -07003087 components.acpi_sdts = sdts;
3088 }
3089
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08003090 // KVM_CREATE_VCPU uses apic id for x86 and uses cpu id for others.
3091 let mut kvm_vcpu_ids = Vec::new();
3092
Kuo-Hsin Yang6139da62021-04-14 16:55:24 +08003093 #[cfg_attr(not(feature = "direct"), allow(unused_mut))]
Zach Reiznerdc748482021-04-14 13:59:30 -07003094 let mut linux = Arch::build_vm::<V, Vcpu>(
Trent Begin17ccaad2019-04-17 13:51:25 -06003095 components,
Zach Reiznerdc748482021-04-14 13:59:30 -07003096 &exit_evt,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003097 &reset_evt,
Zach Reiznerdc748482021-04-14 13:59:30 -07003098 &mut sys_allocator,
Trent Begin17ccaad2019-04-17 13:51:25 -06003099 &cfg.serial_parameters,
Matt Delco45caf912019-11-13 08:11:09 -08003100 simple_jail(&cfg, "serial")?,
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08003101 battery,
Zach Reiznera90649a2021-03-31 12:56:08 -07003102 vm,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09003103 ramoops_region,
Tomasz Nowickiab86d522021-09-22 05:50:46 +00003104 devices,
Zach Reiznerdc748482021-04-14 13:59:30 -07003105 irq_chip,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08003106 &mut kvm_vcpu_ids,
Trent Begin17ccaad2019-04-17 13:51:25 -06003107 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07003108 .context("the architecture failed to build the vm")?;
Lepton Wu60893882018-11-21 11:06:18 -08003109
Daniel Verkamp1286b482021-11-30 15:14:16 -08003110 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
3111 {
3112 // Create Pcie Root Port
3113 let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new()));
3114 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
3115 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
3116 let sec_bus = (1..255)
3117 .find(|&bus_num| sys_allocator.pci_bus_empty(bus_num))
3118 .context("failed to find empty bus for Pci hotplug")?;
3119 let pci_bridge = Box::new(PciBridge::new(
3120 pcie_root_port.clone(),
3121 msi_device_tube,
3122 0,
3123 sec_bus,
3124 ));
3125 Arch::register_pci_device(&mut linux, pci_bridge, None, &mut sys_allocator)
3126 .context("Failed to configure pci bridge device")?;
3127 linux.hotplug_bus.push(pcie_root_port);
3128 }
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003129
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08003130 #[cfg(feature = "direct")]
3131 if let Some(pmio) = &cfg.direct_pmio {
Daniel Verkamp6b298582021-08-16 15:37:11 -07003132 let direct_io = Arc::new(
3133 devices::DirectIo::new(&pmio.path, false).context("failed to open direct io device")?,
3134 );
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08003135 for range in pmio.ranges.iter() {
3136 linux
3137 .io_bus
Junichi Uekawab180f9c2021-12-07 09:21:36 +09003138 .insert_sync(direct_io.clone(), range.base, range.len)
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08003139 .unwrap();
3140 }
3141 };
3142
Tomasz Jeznach7271f752021-03-04 01:44:06 -08003143 #[cfg(feature = "direct")]
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07003144 if let Some(mmio) = &cfg.direct_mmio {
Xiong Zhang46471a02021-11-12 00:34:42 +08003145 let direct_mmio = Arc::new(
Junichi Uekawab180f9c2021-12-07 09:21:36 +09003146 devices::DirectMmio::new(&mmio.path, false, &mmio.ranges)
Xiong Zhang46471a02021-11-12 00:34:42 +08003147 .context("failed to open direct mmio device")?,
Daniel Verkamp6b298582021-08-16 15:37:11 -07003148 );
Xiong Zhang46471a02021-11-12 00:34:42 +08003149
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07003150 for range in mmio.ranges.iter() {
3151 linux
3152 .mmio_bus
Junichi Uekawab180f9c2021-12-07 09:21:36 +09003153 .insert_sync(direct_mmio.clone(), range.base, range.len)
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07003154 .unwrap();
3155 }
3156 };
3157
3158 #[cfg(feature = "direct")]
Tomasz Jeznach7271f752021-03-04 01:44:06 -08003159 let mut irqs = Vec::new();
3160
3161 #[cfg(feature = "direct")]
3162 for irq in &cfg.direct_level_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07003163 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08003164 warn!("irq {} already reserved.", irq);
3165 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07003166 let trigger = Event::new().context("failed to create event")?;
3167 let resample = Event::new().context("failed to create event")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08003168 linux
3169 .irq_chip
3170 .register_irq_event(*irq, &trigger, Some(&resample))
3171 .unwrap();
Daniel Verkamp6b298582021-08-16 15:37:11 -07003172 let direct_irq = devices::DirectIrq::new(trigger, Some(resample))
3173 .context("failed to enable interrupt forwarding")?;
3174 direct_irq
3175 .irq_enable(*irq)
3176 .context("failed to enable interrupt forwarding")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08003177 irqs.push(direct_irq);
3178 }
3179
3180 #[cfg(feature = "direct")]
3181 for irq in &cfg.direct_edge_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07003182 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08003183 warn!("irq {} already reserved.", irq);
3184 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07003185 let trigger = Event::new().context("failed to create event")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08003186 linux
3187 .irq_chip
3188 .register_irq_event(*irq, &trigger, None)
3189 .unwrap();
Daniel Verkamp6b298582021-08-16 15:37:11 -07003190 let direct_irq = devices::DirectIrq::new(trigger, None)
3191 .context("failed to enable interrupt forwarding")?;
3192 direct_irq
3193 .irq_enable(*irq)
3194 .context("failed to enable interrupt forwarding")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08003195 irqs.push(direct_irq);
3196 }
3197
Daniel Verkamp6b298582021-08-16 15:37:11 -07003198 let gralloc = RutabagaGralloc::new().context("failed to create gralloc")?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08003199 run_control(
3200 linux,
Zach Reiznerdc748482021-04-14 13:59:30 -07003201 sys_allocator,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003202 cfg,
Zach Reiznera60744b2019-02-13 17:33:32 -08003203 control_server_socket,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003204 control_tubes,
3205 balloon_host_tube,
3206 &disk_host_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07003207 #[cfg(feature = "usb")]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003208 usb_control_tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07003209 exit_evt,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003210 reset_evt,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08003211 sigchld_fd,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08003212 Arc::clone(&map_request),
Gurchetan Singh293913c2020-12-09 10:44:13 -08003213 gralloc,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08003214 kvm_vcpu_ids,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08003215 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07003216}
3217
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003218fn get_hp_bus<V: VmArch, Vcpu: VcpuArch>(
3219 linux: &RunnableLinuxVm<V, Vcpu>,
3220 host_addr: PciAddress,
3221) -> Result<(Arc<Mutex<dyn HotPlugBus>>, u8)> {
3222 for hp_bus in linux.hotplug_bus.iter() {
3223 if let Some(number) = hp_bus.lock().is_match(host_addr) {
3224 return Ok((hp_bus.clone(), number));
3225 }
3226 }
3227 Err(anyhow!("Failed to find a suitable hotplug bus"))
3228}
3229
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08003230fn add_vfio_device<V: VmArch, Vcpu: VcpuArch>(
3231 linux: &mut RunnableLinuxVm<V, Vcpu>,
3232 sys_allocator: &mut SystemAllocator,
3233 cfg: &Config,
3234 control_tubes: &mut Vec<TaggedControlTube>,
3235 vfio_path: &Path,
3236) -> Result<()> {
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003237 let host_os_str = vfio_path
3238 .file_name()
3239 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
3240 let host_str = host_os_str
3241 .to_str()
3242 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
3243 let host_addr = PciAddress::from_string(host_str);
3244
3245 let (hp_bus, bus_num) = get_hp_bus(linux, host_addr)?;
3246
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08003247 let mut endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>> = BTreeMap::new();
3248 let (vfio_pci_device, jail) = create_vfio_device(
3249 cfg,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003250 &linux.vm,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08003251 sys_allocator,
3252 control_tubes,
3253 vfio_path,
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003254 Some(bus_num),
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08003255 &mut endpoints,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08003256 None,
3257 IommuDevType::NoIommu,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08003258 )?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003259
3260 let pci_address = Arch::register_pci_device(linux, vfio_pci_device, jail, sys_allocator)
Daniel Verkamp6b298582021-08-16 15:37:11 -07003261 .context("Failed to configure pci hotplug device")?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003262
Daniel Verkamp6b298582021-08-16 15:37:11 -07003263 let host_os_str = vfio_path
3264 .file_name()
3265 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
3266 let host_str = host_os_str
3267 .to_str()
3268 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003269 let host_addr = PciAddress::from_string(host_str);
3270 let host_key = HostHotPlugKey::Vfio { host_addr };
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003271 let mut hp_bus = hp_bus.lock();
3272 hp_bus.add_hotplug_device(host_key, pci_address);
3273 hp_bus.hot_plug(pci_address);
3274 Ok(())
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08003275}
3276
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003277fn remove_vfio_device<V: VmArch, Vcpu: VcpuArch>(
3278 linux: &RunnableLinuxVm<V, Vcpu>,
Xiong Zhang2d45b912021-05-13 16:22:25 +08003279 sys_allocator: &mut SystemAllocator,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003280 vfio_path: &Path,
3281) -> Result<()> {
Daniel Verkamp6b298582021-08-16 15:37:11 -07003282 let host_os_str = vfio_path
3283 .file_name()
3284 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
3285 let host_str = host_os_str
3286 .to_str()
3287 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003288 let host_addr = PciAddress::from_string(host_str);
3289 let host_key = HostHotPlugKey::Vfio { host_addr };
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003290 for hp_bus in linux.hotplug_bus.iter() {
3291 let mut hp_bus_lock = hp_bus.lock();
3292 if let Some(pci_addr) = hp_bus_lock.get_hotplug_device(host_key) {
3293 hp_bus_lock.hot_unplug(pci_addr);
Xiong Zhang2d45b912021-05-13 16:22:25 +08003294 sys_allocator.release_pci(pci_addr.bus, pci_addr.dev, pci_addr.func);
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003295 return Ok(());
3296 }
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003297 }
3298
Daniel Verkamp6b298582021-08-16 15:37:11 -07003299 Err(anyhow!("HotPlugBus hasn't been implemented"))
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003300}
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08003301
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003302fn handle_vfio_command<V: VmArch, Vcpu: VcpuArch>(
3303 linux: &mut RunnableLinuxVm<V, Vcpu>,
3304 sys_allocator: &mut SystemAllocator,
3305 cfg: &Config,
3306 add_tubes: &mut Vec<TaggedControlTube>,
3307 vfio_path: &Path,
3308 add: bool,
3309) -> VmResponse {
3310 let ret = if add {
3311 add_vfio_device(linux, sys_allocator, cfg, add_tubes, vfio_path)
3312 } else {
3313 remove_vfio_device(linux, sys_allocator, vfio_path)
3314 };
3315
3316 match ret {
3317 Ok(()) => VmResponse::Ok,
3318 Err(e) => {
3319 error!("hanlde_vfio_command failure: {}", e);
3320 add_tubes.clear();
3321 VmResponse::Err(base::Error::new(libc::EINVAL))
3322 }
3323 }
3324}
3325
Daniel Verkamp29409802021-02-24 14:46:19 -08003326/// Signals all running VCPUs to vmexit, sends VcpuControl message to each VCPU tube, and tells
3327/// `irq_chip` to stop blocking halted VCPUs. The channel message is set first because both the
Steven Richman11dc6712020-09-02 15:39:14 -07003328/// signal and the irq_chip kick could cause the VCPU thread to continue through the VCPU run
3329/// loop.
3330fn kick_all_vcpus(
3331 vcpu_handles: &[(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)],
Zach Reiznerdc748482021-04-14 13:59:30 -07003332 irq_chip: &dyn IrqChip,
Daniel Verkamp29409802021-02-24 14:46:19 -08003333 message: VcpuControl,
Steven Richman11dc6712020-09-02 15:39:14 -07003334) {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003335 for (handle, tube) in vcpu_handles {
Daniel Verkamp29409802021-02-24 14:46:19 -08003336 if let Err(e) = tube.send(message.clone()) {
3337 error!("failed to send VcpuControl: {}", e);
Steven Richman11dc6712020-09-02 15:39:14 -07003338 }
3339 let _ = handle.kill(SIGRTMIN() + 0);
3340 }
3341 irq_chip.kick_halted_vcpus();
3342}
3343
Zach Reiznerdc748482021-04-14 13:59:30 -07003344fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
3345 mut linux: RunnableLinuxVm<V, Vcpu>,
3346 mut sys_allocator: SystemAllocator,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003347 cfg: Config,
Zach Reiznera60744b2019-02-13 17:33:32 -08003348 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003349 mut control_tubes: Vec<TaggedControlTube>,
3350 balloon_host_tube: Tube,
3351 disk_host_tubes: &[Tube],
Daniel Verkampf1439d42021-05-21 13:55:10 -07003352 #[cfg(feature = "usb")] usb_control_tube: Tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07003353 exit_evt: Event,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003354 reset_evt: Event,
Zach Reizner55a9e502018-10-03 10:22:32 -07003355 sigchld_fd: SignalFd,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08003356 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Gurchetan Singh293913c2020-12-09 10:44:13 -08003357 mut gralloc: RutabagaGralloc,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08003358 kvm_vcpu_ids: Vec<usize>,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003359) -> Result<ExitState> {
Zach Reizner5bed0d22018-03-28 02:31:11 -07003360 #[derive(PollToken)]
3361 enum Token {
3362 Exit,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003363 Reset,
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003364 Suspend,
Zach Reizner5bed0d22018-03-28 02:31:11 -07003365 ChildSignal,
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003366 IrqFd { index: IrqEventIndex },
Zach Reiznera60744b2019-02-13 17:33:32 -08003367 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07003368 VmControl { index: usize },
3369 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003370
Zach Reizner19ad1f32019-12-12 18:58:50 -08003371 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08003372 .set_raw_mode()
3373 .expect("failed to set terminal raw mode");
3374
Michael Hoylee392c462020-10-07 03:29:24 -07003375 let wait_ctx = WaitContext::build_with(&[
Zach Reiznerdc748482021-04-14 13:59:30 -07003376 (&exit_evt, Token::Exit),
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003377 (&reset_evt, Token::Reset),
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003378 (&linux.suspend_evt, Token::Suspend),
Zach Reiznerb2110be2019-07-23 15:55:03 -07003379 (&sigchld_fd, Token::ChildSignal),
3380 ])
Daniel Verkamp6b298582021-08-16 15:37:11 -07003381 .context("failed to add descriptor to wait context")?;
Zach Reiznerb2110be2019-07-23 15:55:03 -07003382
Zach Reiznera60744b2019-02-13 17:33:32 -08003383 if let Some(socket_server) = &control_server_socket {
Michael Hoylee392c462020-10-07 03:29:24 -07003384 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08003385 .add(socket_server, Token::VmControlServer)
Daniel Verkamp6b298582021-08-16 15:37:11 -07003386 .context("failed to add descriptor to wait context")?;
Zach Reiznera60744b2019-02-13 17:33:32 -08003387 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003388 for (index, socket) in control_tubes.iter().enumerate() {
Michael Hoylee392c462020-10-07 03:29:24 -07003389 wait_ctx
Zach Reizner55a9e502018-10-03 10:22:32 -07003390 .add(socket.as_ref(), Token::VmControl { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07003391 .context("failed to add descriptor to wait context")?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08003392 }
3393
Steven Richmanf32d0b42020-06-20 21:45:32 -07003394 let events = linux
3395 .irq_chip
3396 .irq_event_tokens()
Daniel Verkamp6b298582021-08-16 15:37:11 -07003397 .context("failed to add descriptor to wait context")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07003398
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003399 for (index, _gsi, evt) in events {
Michael Hoylee392c462020-10-07 03:29:24 -07003400 wait_ctx
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003401 .add(&evt, Token::IrqFd { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07003402 .context("failed to add descriptor to wait context")?;
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08003403 }
3404
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003405 if cfg.sandbox {
Lepton Wu20333e42019-03-14 10:48:03 -07003406 // Before starting VCPUs, in case we started with some capabilities, drop them all.
Daniel Verkamp6b298582021-08-16 15:37:11 -07003407 drop_capabilities().context("failed to drop process capabilities")?;
Lepton Wu20333e42019-03-14 10:48:03 -07003408 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08003409
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003410 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
3411 // Create a channel for GDB thread.
3412 let (to_gdb_channel, from_vcpu_channel) = if linux.gdb.is_some() {
3413 let (s, r) = mpsc::channel();
3414 (Some(s), Some(r))
3415 } else {
3416 (None, None)
3417 };
3418
Steven Richmanf32d0b42020-06-20 21:45:32 -07003419 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
3420 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
Steven Richmanf32d0b42020-06-20 21:45:32 -07003421 let use_hypervisor_signals = !linux
3422 .vm
3423 .get_hypervisor()
3424 .check_capability(&HypervisorCap::ImmediateExit);
Zach Reizner304e7312020-09-29 16:00:24 -07003425 setup_vcpu_signal_handler::<Vcpu>(use_hypervisor_signals)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07003426
Zach Reizner304e7312020-09-29 16:00:24 -07003427 let vcpus: Vec<Option<_>> = match linux.vcpus.take() {
Andrew Walbran9cfdbd92021-01-11 17:40:34 +00003428 Some(vec) => vec.into_iter().map(Some).collect(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07003429 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
3430 };
Yusuke Sato31e136a2021-08-18 11:51:38 -07003431 // Enable core scheduling before creating vCPUs so that the cookie will be
3432 // shared by all vCPU threads.
3433 // TODO(b/199312402): Avoid enabling core scheduling for the crosvm process
3434 // itself for even better performance. Only vCPUs need the feature.
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003435 if cfg.per_vm_core_scheduling {
Yusuke Sato31e136a2021-08-18 11:51:38 -07003436 if let Err(e) = enable_core_scheduling() {
3437 error!("Failed to enable core scheduling: {}", e);
3438 }
3439 }
Daniel Verkamp94c35272019-09-12 13:31:30 -07003440 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Dylan Reidb0492662019-05-17 14:50:13 -07003441 let (to_vcpu_channel, from_main_channel) = mpsc::channel();
Daniel Verkampc677fb42020-09-08 13:47:49 -07003442 let vcpu_affinity = match linux.vcpu_affinity.clone() {
3443 Some(VcpuAffinity::Global(v)) => v,
3444 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(),
3445 None => Default::default(),
3446 };
Zach Reizner55a9e502018-10-03 10:22:32 -07003447 let handle = run_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07003448 cpu_id,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08003449 kvm_vcpu_ids[cpu_id],
Zach Reizner55a9e502018-10-03 10:22:32 -07003450 vcpu,
Daniel Verkamp6b298582021-08-16 15:37:11 -07003451 linux.vm.try_clone().context("failed to clone vm")?,
3452 linux
3453 .irq_chip
3454 .try_box_clone()
3455 .context("failed to clone irqchip")?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003456 linux.vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09003457 linux.rt_cpus.contains(&cpu_id),
Daniel Verkampc677fb42020-09-08 13:47:49 -07003458 vcpu_affinity,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09003459 linux.delay_rt,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09003460 linux.no_smt,
Zach Reizner55a9e502018-10-03 10:22:32 -07003461 vcpu_thread_barrier.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07003462 linux.has_bios,
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07003463 (*linux.io_bus).clone(),
3464 (*linux.mmio_bus).clone(),
Daniel Verkamp6b298582021-08-16 15:37:11 -07003465 exit_evt.try_clone().context("failed to clone event")?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003466 linux.vm.check_capability(VmCap::PvClockSuspend),
Dylan Reidb0492662019-05-17 14:50:13 -07003467 from_main_channel,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003468 use_hypervisor_signals,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003469 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
3470 to_gdb_channel.clone(),
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003471 cfg.per_vm_core_scheduling,
3472 cfg.host_cpu_topology,
Zach Reizner55a9e502018-10-03 10:22:32 -07003473 )?;
Dylan Reidb0492662019-05-17 14:50:13 -07003474 vcpu_handles.push((handle, to_vcpu_channel));
Dylan Reid059a1882018-07-23 17:58:09 -07003475 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07003476
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003477 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
3478 // Spawn GDB thread.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003479 if let Some((gdb_port_num, gdb_control_tube)) = linux.gdb.take() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003480 let to_vcpu_channels = vcpu_handles
3481 .iter()
3482 .map(|(_handle, channel)| channel.clone())
3483 .collect();
3484 let target = GdbStub::new(
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003485 gdb_control_tube,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003486 to_vcpu_channels,
3487 from_vcpu_channel.unwrap(), // Must succeed to unwrap()
3488 );
3489 thread::Builder::new()
3490 .name("gdb".to_owned())
3491 .spawn(move || gdb_thread(target, gdb_port_num))
Daniel Verkamp6b298582021-08-16 15:37:11 -07003492 .context("failed to spawn GDB thread")?;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003493 };
3494
Dylan Reid059a1882018-07-23 17:58:09 -07003495 vcpu_thread_barrier.wait();
3496
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003497 let mut exit_state = ExitState::Stop;
Charles William Dick54045012021-07-27 19:11:53 +09003498 let mut balloon_stats_id: u64 = 0;
3499
Michael Hoylee392c462020-10-07 03:29:24 -07003500 'wait: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07003501 let events = {
Michael Hoylee392c462020-10-07 03:29:24 -07003502 match wait_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08003503 Ok(v) => v,
3504 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08003505 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003506 break;
3507 }
3508 }
3509 };
Zach Reiznera60744b2019-02-13 17:33:32 -08003510
Steven Richmanf32d0b42020-06-20 21:45:32 -07003511 if let Err(e) = linux.irq_chip.process_delayed_irq_events() {
3512 warn!("can't deliver delayed irqs: {}", e);
3513 }
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08003514
Zach Reiznera60744b2019-02-13 17:33:32 -08003515 let mut vm_control_indices_to_remove = Vec::new();
Michael Hoylee392c462020-10-07 03:29:24 -07003516 for event in events.iter().filter(|e| e.is_readable) {
3517 match event.token {
Zach Reizner5bed0d22018-03-28 02:31:11 -07003518 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08003519 info!("vcpu requested shutdown");
Michael Hoylee392c462020-10-07 03:29:24 -07003520 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08003521 }
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003522 Token::Reset => {
3523 info!("vcpu requested reset");
3524 exit_state = ExitState::Reset;
3525 break 'wait;
3526 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003527 Token::Suspend => {
3528 info!("VM requested suspend");
3529 linux.suspend_evt.read().unwrap();
Zach Reiznerdc748482021-04-14 13:59:30 -07003530 kick_all_vcpus(
3531 &vcpu_handles,
3532 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003533 VcpuControl::RunState(VmRunMode::Suspending),
Zach Reiznerdc748482021-04-14 13:59:30 -07003534 );
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003535 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003536 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08003537 // Print all available siginfo structs, then exit the loop.
Daniel Verkamp6b298582021-08-16 15:37:11 -07003538 while let Some(siginfo) =
3539 sigchld_fd.read().context("failed to create signalfd")?
3540 {
Zach Reizner3ba00982019-01-23 19:04:43 -08003541 let pid = siginfo.ssi_pid;
3542 let pid_label = match linux.pid_debug_label_map.get(&pid) {
3543 Some(label) => format!("{} (pid {})", label, pid),
3544 None => format!("pid {}", pid),
3545 };
David Tolnayf5032762018-12-03 10:46:45 -08003546 error!(
3547 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08003548 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08003549 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08003550 }
Michael Hoylee392c462020-10-07 03:29:24 -07003551 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08003552 }
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003553 Token::IrqFd { index } => {
3554 if let Err(e) = linux.irq_chip.service_irq_event(index) {
3555 error!("failed to signal irq {}: {}", index, e);
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08003556 }
3557 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003558 Token::VmControlServer => {
3559 if let Some(socket_server) = &control_server_socket {
3560 match socket_server.accept() {
3561 Ok(socket) => {
Michael Hoylee392c462020-10-07 03:29:24 -07003562 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08003563 .add(
3564 &socket,
3565 Token::VmControl {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003566 index: control_tubes.len(),
Zach Reiznera60744b2019-02-13 17:33:32 -08003567 },
3568 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07003569 .context("failed to add descriptor to wait context")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003570 control_tubes.push(TaggedControlTube::Vm(Tube::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08003571 }
3572 Err(e) => error!("failed to accept socket: {}", e),
3573 }
3574 }
3575 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003576 Token::VmControl { index } => {
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003577 let mut add_tubes = Vec::new();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003578 if let Some(socket) = control_tubes.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003579 match socket {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003580 TaggedControlTube::Vm(tube) => match tube.recv::<VmRequest>() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003581 Ok(request) => {
3582 let mut run_mode_opt = None;
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003583 let response = match request {
3584 VmRequest::VfioCommand { vfio_path, add } => {
3585 handle_vfio_command(
3586 &mut linux,
3587 &mut sys_allocator,
3588 &cfg,
3589 &mut add_tubes,
3590 &vfio_path,
3591 add,
3592 )
3593 }
3594 _ => request.execute(
3595 &mut run_mode_opt,
3596 &balloon_host_tube,
3597 &mut balloon_stats_id,
3598 disk_host_tubes,
3599 #[cfg(feature = "usb")]
3600 Some(&usb_control_tube),
3601 #[cfg(not(feature = "usb"))]
3602 None,
3603 &mut linux.bat_control,
3604 &vcpu_handles,
3605 ),
3606 };
3607
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003608 if let Err(e) = tube.send(&response) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003609 error!("failed to send VmResponse: {}", e);
3610 }
3611 if let Some(run_mode) = run_mode_opt {
3612 info!("control socket changed run mode to {}", run_mode);
3613 match run_mode {
3614 VmRunMode::Exiting => {
Michael Hoylee392c462020-10-07 03:29:24 -07003615 break 'wait;
Jakub Starond99cd0a2019-04-11 14:09:39 -07003616 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003617 other => {
Chuanxiao Dong2bbe85c2020-11-12 17:18:07 +08003618 if other == VmRunMode::Running {
Daniel Verkampda4e8a92021-07-21 13:49:02 -07003619 for dev in &linux.resume_notify_devices {
3620 dev.lock().resume_imminent();
3621 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003622 }
Steven Richman11dc6712020-09-02 15:39:14 -07003623 kick_all_vcpus(
3624 &vcpu_handles,
Zach Reiznerdc748482021-04-14 13:59:30 -07003625 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003626 VcpuControl::RunState(other),
Steven Richman11dc6712020-09-02 15:39:14 -07003627 );
Zach Reizner6a8fdd92019-01-16 14:38:41 -08003628 }
3629 }
3630 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003631 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003632 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003633 if let TubeError::Disconnected = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003634 vm_control_indices_to_remove.push(index);
3635 } else {
3636 error!("failed to recv VmRequest: {}", e);
3637 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003638 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003639 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003640 TaggedControlTube::VmMemory(tube) => {
3641 match tube.recv::<VmMemoryRequest>() {
3642 Ok(request) => {
3643 let response = request.execute(
3644 &mut linux.vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07003645 &mut sys_allocator,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003646 Arc::clone(&map_request),
3647 &mut gralloc,
3648 );
3649 if let Err(e) = tube.send(&response) {
3650 error!("failed to send VmMemoryControlResponse: {}", e);
3651 }
3652 }
3653 Err(e) => {
3654 if let TubeError::Disconnected = e {
3655 vm_control_indices_to_remove.push(index);
3656 } else {
3657 error!("failed to recv VmMemoryControlRequest: {}", e);
3658 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003659 }
3660 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003661 }
3662 TaggedControlTube::VmIrq(tube) => match tube.recv::<VmIrqRequest>() {
Xiong Zhang2515b752019-09-19 10:29:02 +08003663 Ok(request) => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07003664 let response = {
3665 let irq_chip = &mut linux.irq_chip;
3666 request.execute(
3667 |setup| match setup {
3668 IrqSetup::Event(irq, ev) => {
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003669 if let Some(event_index) = irq_chip
3670 .register_irq_event(irq, ev, None)?
3671 {
3672 match wait_ctx.add(
3673 ev,
3674 Token::IrqFd {
3675 index: event_index
3676 },
3677 ) {
3678 Err(e) => {
3679 warn!("failed to add IrqFd to poll context: {}", e);
3680 Err(e)
3681 },
3682 Ok(_) => {
3683 Ok(())
3684 }
3685 }
3686 } else {
3687 Ok(())
3688 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07003689 }
3690 IrqSetup::Route(route) => irq_chip.route_irq(route),
Xiong Zhang4fbc5542021-06-01 11:29:14 +08003691 IrqSetup::UnRegister(irq, ev) => irq_chip.unregister_irq_event(irq, ev),
Steven Richmanf32d0b42020-06-20 21:45:32 -07003692 },
Zach Reiznerdc748482021-04-14 13:59:30 -07003693 &mut sys_allocator,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003694 )
3695 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003696 if let Err(e) = tube.send(&response) {
Xiong Zhang2515b752019-09-19 10:29:02 +08003697 error!("failed to send VmIrqResponse: {}", e);
3698 }
3699 }
3700 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003701 if let TubeError::Disconnected = e {
Xiong Zhang2515b752019-09-19 10:29:02 +08003702 vm_control_indices_to_remove.push(index);
3703 } else {
3704 error!("failed to recv VmIrqRequest: {}", e);
3705 }
3706 }
3707 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003708 TaggedControlTube::VmMsync(tube) => {
3709 match tube.recv::<VmMsyncRequest>() {
3710 Ok(request) => {
3711 let response = request.execute(&mut linux.vm);
3712 if let Err(e) = tube.send(&response) {
3713 error!("failed to send VmMsyncResponse: {}", e);
3714 }
3715 }
3716 Err(e) => {
3717 if let TubeError::Disconnected = e {
3718 vm_control_indices_to_remove.push(index);
3719 } else {
3720 error!("failed to recv VmMsyncRequest: {}", e);
3721 }
Daniel Verkampe1980a92020-02-07 11:00:55 -08003722 }
3723 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003724 }
3725 TaggedControlTube::Fs(tube) => match tube.recv::<FsMappingRequest>() {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003726 Ok(request) => {
3727 let response =
Zach Reiznerdc748482021-04-14 13:59:30 -07003728 request.execute(&mut linux.vm, &mut sys_allocator);
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003729 if let Err(e) = tube.send(&response) {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003730 error!("failed to send VmResponse: {}", e);
3731 }
3732 }
3733 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003734 if let TubeError::Disconnected = e {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003735 vm_control_indices_to_remove.push(index);
3736 } else {
3737 error!("failed to recv VmResponse: {}", e);
3738 }
3739 }
3740 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08003741 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003742 }
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003743 if !add_tubes.is_empty() {
3744 for (idx, socket) in add_tubes.iter().enumerate() {
3745 wait_ctx
3746 .add(
3747 socket.as_ref(),
3748 Token::VmControl {
3749 index: idx + control_tubes.len(),
3750 },
3751 )
3752 .context(
3753 "failed to add hotplug vfio-pci descriptor ot wait context",
3754 )?;
3755 }
3756 control_tubes.append(&mut add_tubes);
3757 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003758 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003759 }
3760 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003761
Vikram Auradkarede68c72021-07-01 14:33:54 -07003762 // It's possible more data is readable and buffered while the socket is hungup,
3763 // so don't delete the tube from the poll context until we're sure all the
3764 // data is read.
3765 // Below case covers a condition where we have received a hungup event and the tube is not
3766 // readable.
3767 // In case of readable tube, once all data is read, any attempt to read more data on hungup
3768 // tube should fail. On such failure, we get Disconnected error and index gets added to
3769 // vm_control_indices_to_remove by the time we reach here.
3770 for event in events.iter().filter(|e| e.is_hungup && !e.is_readable) {
3771 if let Token::VmControl { index } = event.token {
3772 vm_control_indices_to_remove.push(index);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003773 }
3774 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003775
3776 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08003777 // preserves correct indexes as each element is removed.
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07003778 vm_control_indices_to_remove.sort_unstable_by_key(|&k| Reverse(k));
Zach Reiznera60744b2019-02-13 17:33:32 -08003779 vm_control_indices_to_remove.dedup();
3780 for index in vm_control_indices_to_remove {
Michael Hoylee392c462020-10-07 03:29:24 -07003781 // Delete the socket from the `wait_ctx` synchronously. Otherwise, the kernel will do
3782 // this automatically when the FD inserted into the `wait_ctx` is closed after this
Zide Chen89584072019-11-14 10:33:51 -08003783 // if-block, but this removal can be deferred unpredictably. In some instances where the
Michael Hoylee392c462020-10-07 03:29:24 -07003784 // system is under heavy load, we can even get events returned by `wait_ctx` for an FD
Zide Chen89584072019-11-14 10:33:51 -08003785 // that has already been closed. Because the token associated with that spurious event
3786 // now belongs to a different socket, the control loop will start to interact with
3787 // sockets that might not be ready to use. This can cause incorrect hangup detection or
3788 // blocking on a socket that will never be ready. See also: crbug.com/1019986
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003789 if let Some(socket) = control_tubes.get(index) {
Daniel Verkamp6b298582021-08-16 15:37:11 -07003790 wait_ctx
3791 .delete(socket)
3792 .context("failed to remove descriptor from wait context")?;
Zide Chen89584072019-11-14 10:33:51 -08003793 }
3794
3795 // This line implicitly drops the socket at `index` when it gets returned by
3796 // `swap_remove`. After this line, the socket at `index` is not the one from
3797 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
Michael Hoylee392c462020-10-07 03:29:24 -07003798 // use `wait_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003799 control_tubes.swap_remove(index);
3800 if let Some(tube) = control_tubes.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07003801 wait_ctx
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003802 .modify(tube, EventType::Read, Token::VmControl { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07003803 .context("failed to add descriptor to wait context")?;
Zach Reiznera60744b2019-02-13 17:33:32 -08003804 }
3805 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003806 }
3807
Zach Reiznerdc748482021-04-14 13:59:30 -07003808 kick_all_vcpus(
3809 &vcpu_handles,
3810 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003811 VcpuControl::RunState(VmRunMode::Exiting),
Zach Reiznerdc748482021-04-14 13:59:30 -07003812 );
Steven Richman11dc6712020-09-02 15:39:14 -07003813 for (handle, _) in vcpu_handles {
3814 if let Err(e) = handle.join() {
3815 error!("failed to join vcpu thread: {:?}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003816 }
3817 }
3818
Daniel Verkamp94c35272019-09-12 13:31:30 -07003819 // Explicitly drop the VM structure here to allow the devices to clean up before the
3820 // control sockets are closed when this function exits.
3821 mem::drop(linux);
3822
Zach Reizner19ad1f32019-12-12 18:58:50 -08003823 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08003824 .set_canon_mode()
3825 .expect("failed to restore canonical mode for terminal");
3826
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003827 Ok(exit_state)
Zach Reizner39aa26b2017-12-12 18:03:23 -08003828}