blob: 4d3b4013565697eccfcbe84b29d8271f89522a81 [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Hikaru Nishida584e52c2021-04-27 17:37:08 +09005use std::cmp::Reverse;
Zide Chendfc4b882021-03-10 16:35:37 -08006use std::collections::BTreeMap;
Jakub Starona3411ea2019-04-24 10:55:25 -07007use std::convert::TryFrom;
John Batesb220eac2020-09-14 17:03:02 -07008#[cfg(feature = "gpu")]
9use std::env;
Dylan Reid059a1882018-07-23 17:58:09 -070010use std::fs::{File, OpenOptions};
Federico 'Morg' Pareschia1184822021-09-09 10:52:58 +090011use std::io::stdin;
Steven Richmanf32d0b42020-06-20 21:45:32 -070012use std::iter;
Daniel Verkamp94c35272019-09-12 13:31:30 -070013use std::mem;
David Tolnay2b089fc2019-03-04 15:33:22 -080014use std::net::Ipv4Addr;
Christian Blichmann50f95912021-11-05 16:59:39 +010015use std::os::unix::{io::FromRawFd, net::UnixStream, prelude::OpenOptionsExt};
Zach Reizner39aa26b2017-12-12 18:03:23 -080016use std::path::{Path, PathBuf};
Chirantan Ekbote448516e2018-07-24 16:07:42 -070017use std::str;
Dylan Reidb0492662019-05-17 14:50:13 -070018use std::sync::{mpsc, Arc, Barrier};
Hikaru Nishida584e52c2021-04-27 17:37:08 +090019use std::time::Duration;
Dylan Reidb0492662019-05-17 14:50:13 -070020
Zach Reizner39aa26b2017-12-12 18:03:23 -080021use std::thread;
22use std::thread::JoinHandle;
23
Daniel Verkamp6b298582021-08-16 15:37:11 -070024use libc::{self, c_int, gid_t, uid_t};
Zach Reizner39aa26b2017-12-12 18:03:23 -080025
Tomasz Jeznach42644642020-05-20 23:27:59 -070026use acpi_tables::sdt::SDT;
27
Daniel Verkamp6b298582021-08-16 15:37:11 -070028use anyhow::{anyhow, bail, Context, Result};
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +090029use base::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080030use base::*;
Keiichi Watanabe553d2192021-08-16 16:42:27 +090031use devices::serial_device::{SerialHardware, SerialParameters};
Zide Chenafdb9382021-06-17 12:04:43 -070032use devices::vfio::{VfioCommonSetup, VfioCommonTrait};
Woody Chow0b2b6062021-09-03 15:40:02 +090033#[cfg(feature = "audio_cras")]
34use devices::virtio::snd::cras_backend::Parameters as CrasSndParameters;
Woody Chow1b16db12021-04-02 16:59:59 +090035#[cfg(feature = "audio")]
36use devices::virtio::vhost::user::vmm::Snd as VhostUserSnd;
Keiichi Watanabefb36e0c2021-08-13 18:48:31 +090037use devices::virtio::vhost::user::vmm::{
Richard5afeafa2021-07-26 19:02:09 -070038 Block as VhostUserBlock, Console as VhostUserConsole, Fs as VhostUserFs,
Chirantan Ekbote84091e52021-09-10 18:43:17 +090039 Mac80211Hwsim as VhostUserMac80211Hwsim, Net as VhostUserNet, Vsock as VhostUserVsock,
40 Wl as VhostUserWl,
Keiichi Watanabe60686582021-03-12 04:53:51 +090041};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070042use devices::virtio::{self, Console, VirtioDevice};
Chirantan Ekbote44292f52021-06-25 18:31:41 +090043#[cfg(feature = "gpu")]
44use devices::virtio::{
45 gpu::{DEFAULT_DISPLAY_HEIGHT, DEFAULT_DISPLAY_WIDTH},
46 vhost::user::vmm::Gpu as VhostUserGpu,
47 EventDevice,
48};
paulhsiace17e6e2020-08-28 18:37:45 +080049#[cfg(feature = "audio")]
50use devices::Ac97Dev;
Will Deaconc48e7832021-07-30 19:03:06 +010051use devices::ProtectionType;
Xiong Zhang17b0daf2019-04-23 17:14:50 +080052use devices::{
Xiong Zhangf82f2dc2021-05-21 16:54:12 +080053 self, BusDeviceObj, HostHotPlugKey, HotPlugBus, IrqChip, IrqEventIndex, KvmKernelIrqChip,
54 PciAddress, PciBridge, PciDevice, PcieRootPort, StubPciDevice, VcpuRunState, VfioContainer,
55 VfioDevice, VfioPciDevice, VfioPlatformDevice, VirtioPciDevice,
Xiong Zhang17b0daf2019-04-23 17:14:50 +080056};
Daniel Verkampf1439d42021-05-21 13:55:10 -070057#[cfg(feature = "usb")]
58use devices::{HostBackendDeviceProvider, XhciController};
Steven Richmanf32d0b42020-06-20 21:45:32 -070059use hypervisor::kvm::{Kvm, KvmVcpu, KvmVm};
Xiong Zhangdea7dbb2021-07-26 14:49:03 +080060use hypervisor::{HypervisorCap, Vcpu, VcpuExit, VcpuRunHandle, Vm, VmCap};
Allen Webbf3024c82020-06-19 07:19:48 -070061use minijail::{self, Minijail};
Richard5afeafa2021-07-26 19:02:09 -070062use net_util::{MacAddress, Tap};
Xiong Zhang87a3b442019-10-29 17:32:44 +080063use resources::{Alloc, MmioType, SystemAllocator};
Gurchetan Singh293913c2020-12-09 10:44:13 -080064use rutabaga_gfx::RutabagaGralloc;
Dylan Reidb0492662019-05-17 14:50:13 -070065use sync::Mutex;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080066use vm_control::*;
Sergey Senozhatskyd78d05b2021-04-13 20:59:58 +090067use vm_memory::{GuestAddress, GuestMemory, MemoryPolicy};
Zach Reizner39aa26b2017-12-12 18:03:23 -080068
Keiichi Watanabec5262e92020-10-21 15:57:33 +090069#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
70use crate::gdb::{gdb_thread, GdbStub};
Keiichi Watanabef3a37f42021-01-21 15:41:11 +090071use crate::{
Tomasz Nowicki71aca792021-06-09 18:53:49 +000072 Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption, VfioType,
Christian Blichmann50f95912021-11-05 16:59:39 +010073 VhostUserFsOption, VhostUserOption, VhostUserWlOption, VhostVsockDeviceParameter,
Keiichi Watanabef3a37f42021-01-21 15:41:11 +090074};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070075use arch::{
Keiichi Watanabe553d2192021-08-16 16:42:27 +090076 self, LinuxArch, RunnableLinuxVm, VcpuAffinity, VirtioDeviceStub, VmComponents, VmImage,
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070077};
Sonny Raoed517d12018-02-13 22:09:43 -080078
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080079#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070080use {
81 aarch64::AArch64 as Arch,
Steven Richman11dc6712020-09-02 15:39:14 -070082 devices::IrqChipAArch64 as IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -070083 hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
84};
Zach Reizner55a9e502018-10-03 10:22:32 -070085#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070086use {
Steven Richman11dc6712020-09-02 15:39:14 -070087 devices::{IrqChipX86_64 as IrqChipArch, KvmSplitIrqChip},
88 hypervisor::{VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
Steven Richmanf32d0b42020-06-20 21:45:32 -070089 x86_64::X8664arch as Arch,
90};
Zach Reizner39aa26b2017-12-12 18:03:23 -080091
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080092enum TaggedControlTube {
93 Fs(Tube),
94 Vm(Tube),
95 VmMemory(Tube),
96 VmIrq(Tube),
97 VmMsync(Tube),
Jakub Starond99cd0a2019-04-11 14:09:39 -070098}
99
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800100impl AsRef<Tube> for TaggedControlTube {
101 fn as_ref(&self) -> &Tube {
102 use self::TaggedControlTube::*;
Jakub Starond99cd0a2019-04-11 14:09:39 -0700103 match &self {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800104 Fs(tube) | Vm(tube) | VmMemory(tube) | VmIrq(tube) | VmMsync(tube) => tube,
Jakub Starond99cd0a2019-04-11 14:09:39 -0700105 }
106 }
107}
108
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800109impl AsRawDescriptor for TaggedControlTube {
Michael Hoylee392c462020-10-07 03:29:24 -0700110 fn as_raw_descriptor(&self) -> RawDescriptor {
Michael Hoylea596a072020-11-10 19:32:45 -0800111 self.as_ref().as_raw_descriptor()
Jakub Starond99cd0a2019-04-11 14:09:39 -0700112 }
113}
114
Matt Delcoc24ad782020-02-14 13:24:36 -0800115struct SandboxConfig<'a> {
116 limit_caps: bool,
117 log_failures: bool,
118 seccomp_policy: &'a Path,
119 uid_map: Option<&'a str>,
120 gid_map: Option<&'a str>,
121}
122
Zach Reizner44863792019-06-26 14:22:08 -0700123fn create_base_minijail(
124 root: &Path,
Matt Delcoc24ad782020-02-14 13:24:36 -0800125 r_limit: Option<u64>,
126 config: Option<&SandboxConfig>,
Zach Reizner44863792019-06-26 14:22:08 -0700127) -> Result<Minijail> {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800128 // All child jails run in a new user namespace without any users mapped,
129 // they run as nobody unless otherwise configured.
Daniel Verkamp6b298582021-08-16 15:37:11 -0700130 let mut j = Minijail::new().context("failed to jail device")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800131
132 if let Some(config) = config {
133 j.namespace_pids();
134 j.namespace_user();
135 j.namespace_user_disable_setgroups();
136 if config.limit_caps {
137 // Don't need any capabilities.
138 j.use_caps(0);
139 }
140 if let Some(uid_map) = config.uid_map {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700141 j.uidmap(uid_map).context("error setting UID map")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800142 }
143 if let Some(gid_map) = config.gid_map {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700144 j.gidmap(gid_map).context("error setting GID map")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800145 }
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900146 // Run in a new mount namespace.
147 j.namespace_vfs();
148
Matt Delcoc24ad782020-02-14 13:24:36 -0800149 // Run in an empty network namespace.
150 j.namespace_net();
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900151
152 // Don't allow the device to gain new privileges.
Matt Delcoc24ad782020-02-14 13:24:36 -0800153 j.no_new_privs();
154
155 // By default we'll prioritize using the pre-compiled .bpf over the .policy
156 // file (the .bpf is expected to be compiled using "trap" as the failure
157 // behavior instead of the default "kill" behavior).
158 // Refer to the code comment for the "seccomp-log-failures"
159 // command-line parameter for an explanation about why the |log_failures|
160 // flag forces the use of .policy files (and the build-time alternative to
161 // this run-time flag).
162 let bpf_policy_file = config.seccomp_policy.with_extension("bpf");
163 if bpf_policy_file.exists() && !config.log_failures {
164 j.parse_seccomp_program(&bpf_policy_file)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700165 .context("failed to parse precompiled seccomp policy")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800166 } else {
167 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP,
168 // which will correctly kill the entire device process if a worker
169 // thread commits a seccomp violation.
170 j.set_seccomp_filter_tsync();
171 if config.log_failures {
172 j.log_seccomp_filter_failures();
173 }
174 j.parse_seccomp_filters(&config.seccomp_policy.with_extension("policy"))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700175 .context("failed to parse seccomp policy")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800176 }
177 j.use_seccomp_filter();
178 // Don't do init setup.
179 j.run_as_init();
180 }
181
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900182 // Only pivot_root if we are not re-using the current root directory.
183 if root != Path::new("/") {
184 // It's safe to call `namespace_vfs` multiple times.
185 j.namespace_vfs();
Daniel Verkamp6b298582021-08-16 15:37:11 -0700186 j.enter_pivot_root(root)
187 .context("failed to pivot root device")?;
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900188 }
Matt Delco45caf912019-11-13 08:11:09 -0800189
Matt Delcoc24ad782020-02-14 13:24:36 -0800190 // Most devices don't need to open many fds.
191 let limit = if let Some(r) = r_limit { r } else { 1024u64 };
192 j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700193 .context("error setting max open files")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800194
Zach Reizner39aa26b2017-12-12 18:03:23 -0800195 Ok(j)
196}
197
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800198fn simple_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
Lepton Wu9105e9f2019-03-14 11:38:31 -0700199 if cfg.sandbox {
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800200 let pivot_root: &str = option_env!("DEFAULT_PIVOT_ROOT").unwrap_or("/var/empty");
201 // A directory for a jailed device's pivot root.
202 let root_path = Path::new(pivot_root);
203 if !root_path.exists() {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700204 bail!("{} doesn't exist, can't jail devices", pivot_root);
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800205 }
206 let policy_path: PathBuf = cfg.seccomp_policy_dir.join(policy);
Matt Delcoc24ad782020-02-14 13:24:36 -0800207 let config = SandboxConfig {
208 limit_caps: true,
209 log_failures: cfg.seccomp_log_failures,
210 seccomp_policy: &policy_path,
211 uid_map: None,
212 gid_map: None,
213 };
214 Ok(Some(create_base_minijail(root_path, None, Some(&config))?))
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800215 } else {
216 Ok(None)
217 }
218}
219
Daniel Verkamp6b298582021-08-16 15:37:11 -0700220type DeviceResult<T = VirtioDeviceStub> = Result<T>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800221
Andrew Walbran4cad30a2021-06-28 15:58:08 +0000222fn create_block_device(cfg: &Config, disk: &DiskOption, disk_device_tube: Tube) -> DeviceResult {
Junichi Uekawa7bea39f2021-07-16 14:05:06 +0900223 let raw_image: File = open_file(&disk.path, disk.read_only, disk.o_direct)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700224 .with_context(|| format!("failed to load disk image {}", disk.path.display()))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800225 // Lock the disk image to prevent other crosvm instances from using it.
226 let lock_op = if disk.read_only {
227 FlockOperation::LockShared
228 } else {
229 FlockOperation::LockExclusive
230 };
Daniel Verkamp6b298582021-08-16 15:37:11 -0700231 flock(&raw_image, lock_op, true).context("failed to lock disk image")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800232
Junichi Uekawa52437db2021-09-29 17:33:07 +0900233 info!("Trying to attach block device: {}", disk.path.display());
Daniel Verkamp6b298582021-08-16 15:37:11 -0700234 let dev = if disk::async_ok(&raw_image).context("failed to check disk async_ok")? {
235 let async_file = disk::create_async_disk_file(raw_image)
236 .context("failed to create async virtual disk")?;
Dylan Reid503c5ab2020-07-17 11:20:07 -0700237 Box::new(
238 virtio::BlockAsync::new(
239 virtio::base_features(cfg.protected_vm),
240 async_file,
241 disk.read_only,
242 disk.sparse,
243 disk.block_size,
Daniel Verkampdd0ee592021-03-29 13:05:22 -0700244 disk.id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800245 Some(disk_device_tube),
Dylan Reid503c5ab2020-07-17 11:20:07 -0700246 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700247 .context("failed to create block device")?,
Dylan Reid503c5ab2020-07-17 11:20:07 -0700248 ) as Box<dyn VirtioDevice>
249 } else {
Daniel Verkampeb1640e2021-09-07 14:09:31 -0700250 let disk_file = disk::create_disk_file(raw_image, disk::MAX_NESTING_DEPTH)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700251 .context("failed to create virtual disk")?;
Dylan Reid503c5ab2020-07-17 11:20:07 -0700252 Box::new(
253 virtio::Block::new(
254 virtio::base_features(cfg.protected_vm),
255 disk_file,
256 disk.read_only,
257 disk.sparse,
258 disk.block_size,
259 disk.id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800260 Some(disk_device_tube),
Dylan Reid503c5ab2020-07-17 11:20:07 -0700261 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700262 .context("failed to create block device")?,
Dylan Reid503c5ab2020-07-17 11:20:07 -0700263 ) as Box<dyn VirtioDevice>
264 };
David Tolnay2b089fc2019-03-04 15:33:22 -0800265
266 Ok(VirtioDeviceStub {
Dylan Reid503c5ab2020-07-17 11:20:07 -0700267 dev,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700268 jail: simple_jail(cfg, "block_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800269 })
270}
271
Keiichi Watanabef3a37f42021-01-21 15:41:11 +0900272fn create_vhost_user_block_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
273 let dev = VhostUserBlock::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700274 .context("failed to set up vhost-user block device")?;
Keiichi Watanabef3a37f42021-01-21 15:41:11 +0900275
276 Ok(VirtioDeviceStub {
277 dev: Box::new(dev),
278 // no sandbox here because virtqueue handling is exported to a different process.
279 jail: None,
280 })
281}
282
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +0900283fn create_vhost_user_console_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
284 let dev = VhostUserConsole::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700285 .context("failed to set up vhost-user console device")?;
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +0900286
287 Ok(VirtioDeviceStub {
288 dev: Box::new(dev),
289 // no sandbox here because virtqueue handling is exported to a different process.
290 jail: None,
291 })
292}
293
Woody Chow5890b702021-02-12 14:57:02 +0900294fn create_vhost_user_fs_device(cfg: &Config, option: &VhostUserFsOption) -> DeviceResult {
295 let dev = VhostUserFs::new(
296 virtio::base_features(cfg.protected_vm),
297 &option.socket,
298 &option.tag,
299 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700300 .context("failed to set up vhost-user fs device")?;
Woody Chow5890b702021-02-12 14:57:02 +0900301
302 Ok(VirtioDeviceStub {
303 dev: Box::new(dev),
304 // no sandbox here because virtqueue handling is exported to a different process.
305 jail: None,
306 })
307}
308
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900309fn create_vhost_user_mac80211_hwsim_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
310 let dev = VhostUserMac80211Hwsim::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700311 .context("failed to set up vhost-user mac80211_hwsim device")?;
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900312
313 Ok(VirtioDeviceStub {
314 dev: Box::new(dev),
315 // no sandbox here because virtqueue handling is exported to a different process.
316 jail: None,
317 })
318}
319
Woody Chow1b16db12021-04-02 16:59:59 +0900320#[cfg(feature = "audio")]
321fn create_vhost_user_snd_device(cfg: &Config, option: &VhostUserOption) -> DeviceResult {
322 let dev = VhostUserSnd::new(virtio::base_features(cfg.protected_vm), &option.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700323 .context("failed to set up vhost-user snd device")?;
Woody Chow1b16db12021-04-02 16:59:59 +0900324
325 Ok(VirtioDeviceStub {
326 dev: Box::new(dev),
327 // no sandbox here because virtqueue handling is exported to a different process.
328 jail: None,
329 })
330}
331
David Tolnay2b089fc2019-03-04 15:33:22 -0800332fn create_rng_device(cfg: &Config) -> DeviceResult {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700333 let dev = virtio::Rng::new(virtio::base_features(cfg.protected_vm))
334 .context("failed to set up rng")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800335
336 Ok(VirtioDeviceStub {
337 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700338 jail: simple_jail(cfg, "rng_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800339 })
340}
341
Woody Chow737ff122021-03-22 17:49:57 +0900342#[cfg(feature = "audio_cras")]
Woody Chow0b2b6062021-09-03 15:40:02 +0900343fn create_cras_snd_device(cfg: &Config, cras_snd: CrasSndParameters) -> DeviceResult {
344 let dev = virtio::snd::cras_backend::VirtioSndCras::new(
345 virtio::base_features(cfg.protected_vm),
346 cras_snd,
347 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700348 .context("failed to create cras sound device")?;
Woody Chow737ff122021-03-22 17:49:57 +0900349
350 let jail = match simple_jail(&cfg, "cras_snd_device")? {
351 Some(mut jail) => {
352 // Create a tmpfs in the device's root directory for cras_snd_device.
353 // The size is 20*1024, or 20 KB.
354 jail.mount_with_data(
355 Path::new("none"),
356 Path::new("/"),
357 "tmpfs",
358 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
359 "size=20480",
360 )?;
361
362 let run_cras_path = Path::new("/run/cras");
363 jail.mount_bind(run_cras_path, run_cras_path, true)?;
364
365 add_current_user_to_jail(&mut jail)?;
366
367 Some(jail)
368 }
369 None => None,
370 };
371
372 Ok(VirtioDeviceStub {
373 dev: Box::new(dev),
374 jail,
375 })
376}
377
David Tolnay2b089fc2019-03-04 15:33:22 -0800378#[cfg(feature = "tpm")]
379fn create_tpm_device(cfg: &Config) -> DeviceResult {
380 use std::ffi::CString;
381 use std::fs;
382 use std::process;
David Tolnay2b089fc2019-03-04 15:33:22 -0800383
384 let tpm_storage: PathBuf;
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700385 let mut tpm_jail = simple_jail(cfg, "tpm_device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800386
387 match &mut tpm_jail {
388 Some(jail) => {
389 // Create a tmpfs in the device's root directory for tpm
390 // simulator storage. The size is 20*1024, or 20 KB.
391 jail.mount_with_data(
392 Path::new("none"),
393 Path::new("/"),
394 "tmpfs",
395 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
396 "size=20480",
397 )?;
398
Fergus Dall51200512021-08-19 12:54:26 +1000399 let crosvm_ids = add_current_user_to_jail(jail)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800400
401 let pid = process::id();
402 let tpm_pid_dir = format!("/run/vm/tpm.{}", pid);
403 tpm_storage = Path::new(&tpm_pid_dir).to_owned();
Daniel Verkamp6b298582021-08-16 15:37:11 -0700404 fs::create_dir_all(&tpm_storage).with_context(|| {
405 format!("failed to create tpm storage dir {}", tpm_storage.display())
406 })?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800407 let tpm_pid_dir_c = CString::new(tpm_pid_dir).expect("no nul bytes");
David Tolnayfd0971d2019-03-04 17:15:57 -0800408 chown(&tpm_pid_dir_c, crosvm_ids.uid, crosvm_ids.gid)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700409 .context("failed to chown tpm storage")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800410
411 jail.mount_bind(&tpm_storage, &tpm_storage, true)?;
412 }
413 None => {
414 // Path used inside cros_sdk which does not have /run/vm.
415 tpm_storage = Path::new("/tmp/tpm-simulator").to_owned();
416 }
417 }
418
419 let dev = virtio::Tpm::new(tpm_storage);
420
421 Ok(VirtioDeviceStub {
422 dev: Box::new(dev),
423 jail: tpm_jail,
424 })
425}
426
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700427fn create_single_touch_device(
428 cfg: &Config,
429 single_touch_spec: &TouchDeviceOption,
430 idx: u32,
431) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800432 let socket = single_touch_spec
433 .get_path()
434 .into_unix_stream()
435 .map_err(|e| {
436 error!("failed configuring virtio single touch: {:?}", e);
437 e
438 })?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800439
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800440 let (width, height) = single_touch_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700441 let dev = virtio::new_single_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700442 idx,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700443 socket,
444 width,
445 height,
446 virtio::base_features(cfg.protected_vm),
447 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700448 .context("failed to set up input device")?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800449 Ok(VirtioDeviceStub {
450 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700451 jail: simple_jail(cfg, "input_device")?,
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800452 })
453}
454
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700455fn create_multi_touch_device(
456 cfg: &Config,
457 multi_touch_spec: &TouchDeviceOption,
458 idx: u32,
459) -> DeviceResult {
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000460 let socket = multi_touch_spec
461 .get_path()
462 .into_unix_stream()
463 .map_err(|e| {
464 error!("failed configuring virtio multi touch: {:?}", e);
465 e
466 })?;
467
468 let (width, height) = multi_touch_spec.get_size();
469 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700470 idx,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000471 socket,
472 width,
473 height,
474 virtio::base_features(cfg.protected_vm),
475 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700476 .context("failed to set up input device")?;
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000477
478 Ok(VirtioDeviceStub {
479 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700480 jail: simple_jail(cfg, "input_device")?,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000481 })
482}
483
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700484fn create_trackpad_device(
485 cfg: &Config,
486 trackpad_spec: &TouchDeviceOption,
487 idx: u32,
488) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800489 let socket = trackpad_spec.get_path().into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800490 error!("failed configuring virtio trackpad: {}", e);
491 e
492 })?;
493
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800494 let (width, height) = trackpad_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700495 let dev = virtio::new_trackpad(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700496 idx,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700497 socket,
498 width,
499 height,
500 virtio::base_features(cfg.protected_vm),
501 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700502 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800503
504 Ok(VirtioDeviceStub {
505 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700506 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800507 })
508}
509
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700510fn create_mouse_device<T: IntoUnixStream>(cfg: &Config, mouse_socket: T, idx: u32) -> DeviceResult {
Zach Reizner65b98f12019-11-22 17:34:58 -0800511 let socket = mouse_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800512 error!("failed configuring virtio mouse: {}", e);
513 e
514 })?;
515
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700516 let dev = virtio::new_mouse(idx, socket, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700517 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800518
519 Ok(VirtioDeviceStub {
520 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700521 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800522 })
523}
524
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700525fn create_keyboard_device<T: IntoUnixStream>(
526 cfg: &Config,
527 keyboard_socket: T,
528 idx: u32,
529) -> DeviceResult {
Zach Reizner65b98f12019-11-22 17:34:58 -0800530 let socket = keyboard_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800531 error!("failed configuring virtio keyboard: {}", e);
532 e
533 })?;
534
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700535 let dev = virtio::new_keyboard(idx, socket, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700536 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800537
538 Ok(VirtioDeviceStub {
539 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700540 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800541 })
542}
543
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700544fn create_switches_device<T: IntoUnixStream>(
545 cfg: &Config,
546 switches_socket: T,
547 idx: u32,
548) -> DeviceResult {
Daniel Norman5e23df72021-03-11 10:11:02 -0800549 let socket = switches_socket.into_unix_stream().map_err(|e| {
550 error!("failed configuring virtio switches: {}", e);
551 e
552 })?;
553
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700554 let dev = virtio::new_switches(idx, socket, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700555 .context("failed to set up input device")?;
Daniel Norman5e23df72021-03-11 10:11:02 -0800556
557 Ok(VirtioDeviceStub {
558 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700559 jail: simple_jail(cfg, "input_device")?,
Daniel Norman5e23df72021-03-11 10:11:02 -0800560 })
561}
562
David Tolnay2b089fc2019-03-04 15:33:22 -0800563fn create_vinput_device(cfg: &Config, dev_path: &Path) -> DeviceResult {
564 let dev_file = OpenOptions::new()
565 .read(true)
566 .write(true)
567 .open(dev_path)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700568 .with_context(|| format!("failed to open vinput device {}", dev_path.display()))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800569
Noah Goldd4ca29b2020-10-27 12:21:52 -0700570 let dev = virtio::new_evdev(dev_file, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700571 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800572
573 Ok(VirtioDeviceStub {
574 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700575 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800576 })
577}
578
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800579fn create_balloon_device(cfg: &Config, tube: Tube) -> DeviceResult {
580 let dev = virtio::Balloon::new(virtio::base_features(cfg.protected_vm), tube)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700581 .context("failed to create balloon")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800582
583 Ok(VirtioDeviceStub {
584 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700585 jail: simple_jail(cfg, "balloon_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800586 })
587}
588
Michael Hoylea596a072020-11-10 19:32:45 -0800589fn create_tap_net_device(cfg: &Config, tap_fd: RawDescriptor) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800590 // Safe because we ensure that we get a unique handle to the fd.
591 let tap = unsafe {
Michael Hoylea596a072020-11-10 19:32:45 -0800592 Tap::from_raw_descriptor(
Daniel Verkamp6b298582021-08-16 15:37:11 -0700593 validate_raw_descriptor(tap_fd).context("failed to validate tap descriptor")?,
Michael Hoylea596a072020-11-10 19:32:45 -0800594 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700595 .context("failed to create tap device")?
David Tolnay2b089fc2019-03-04 15:33:22 -0800596 };
597
Xiong Zhang773c7072020-03-20 10:39:55 +0800598 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
599 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700600 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800601 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
602 vq_pairs = 1;
603 }
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100604 let features = virtio::base_features(cfg.protected_vm);
Daniel Verkamp6b298582021-08-16 15:37:11 -0700605 let dev =
606 virtio::Net::from(features, tap, vq_pairs).context("failed to set up virtio networking")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800607
608 Ok(VirtioDeviceStub {
609 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700610 jail: simple_jail(cfg, "net_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800611 })
612}
613
614fn create_net_device(
615 cfg: &Config,
616 host_ip: Ipv4Addr,
617 netmask: Ipv4Addr,
618 mac_address: MacAddress,
David Tolnay2b089fc2019-03-04 15:33:22 -0800619) -> DeviceResult {
Xiong Zhang773c7072020-03-20 10:39:55 +0800620 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
621 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700622 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800623 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
624 vq_pairs = 1;
625 }
626
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100627 let features = virtio::base_features(cfg.protected_vm);
David Tolnay2b089fc2019-03-04 15:33:22 -0800628 let dev = if cfg.vhost_net {
Will Deacon81d5adb2020-10-06 18:37:48 +0100629 let dev = virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(
Christian Blichmann2f5d4b62021-03-10 18:08:08 +0100630 &cfg.vhost_net_device_path,
Will Deacon81d5adb2020-10-06 18:37:48 +0100631 features,
632 host_ip,
633 netmask,
634 mac_address,
Will Deacon81d5adb2020-10-06 18:37:48 +0100635 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700636 .context("failed to set up vhost networking")?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800637 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800638 } else {
Will Deacon81d5adb2020-10-06 18:37:48 +0100639 let dev = virtio::Net::<Tap>::new(features, host_ip, netmask, mac_address, vq_pairs)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700640 .context("failed to set up virtio networking")?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800641 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800642 };
643
644 let policy = if cfg.vhost_net {
Matt Delco45caf912019-11-13 08:11:09 -0800645 "vhost_net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800646 } else {
Matt Delco45caf912019-11-13 08:11:09 -0800647 "net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800648 };
649
650 Ok(VirtioDeviceStub {
651 dev,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700652 jail: simple_jail(cfg, policy)?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800653 })
654}
655
Keiichi Watanabe60686582021-03-12 04:53:51 +0900656fn create_vhost_user_net_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
657 let dev = VhostUserNet::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700658 .context("failed to set up vhost-user net device")?;
Keiichi Watanabe60686582021-03-12 04:53:51 +0900659
660 Ok(VirtioDeviceStub {
661 dev: Box::new(dev),
662 // no sandbox here because virtqueue handling is exported to a different process.
663 jail: None,
664 })
665}
666
Chirantan Ekbote84091e52021-09-10 18:43:17 +0900667fn create_vhost_user_vsock_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
668 let dev = VhostUserVsock::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700669 .context("failed to set up vhost-user vsock device")?;
Chirantan Ekbote84091e52021-09-10 18:43:17 +0900670
671 Ok(VirtioDeviceStub {
672 dev: Box::new(dev),
673 // no sandbox here because virtqueue handling is exported to a different process.
674 jail: None,
675 })
676}
677
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900678fn create_vhost_user_wl_device(cfg: &Config, opt: &VhostUserWlOption) -> DeviceResult {
679 // The crosvm wl device expects us to connect the tube before it will accept a vhost-user
680 // connection.
681 let dev = VhostUserWl::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700682 .context("failed to set up vhost-user wl device")?;
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900683
684 Ok(VirtioDeviceStub {
685 dev: Box::new(dev),
686 // no sandbox here because virtqueue handling is exported to a different process.
687 jail: None,
688 })
689}
690
David Tolnay2b089fc2019-03-04 15:33:22 -0800691#[cfg(feature = "gpu")]
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900692fn create_vhost_user_gpu_device(
693 cfg: &Config,
694 opt: &VhostUserOption,
695 host_tube: Tube,
696 device_tube: Tube,
697) -> DeviceResult {
698 // The crosvm gpu device expects us to connect the tube before it will accept a vhost-user
699 // connection.
700 let dev = VhostUserGpu::new(
701 virtio::base_features(cfg.protected_vm),
702 &opt.socket,
703 host_tube,
704 device_tube,
705 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700706 .context("failed to set up vhost-user gpu device")?;
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900707
708 Ok(VirtioDeviceStub {
709 dev: Box::new(dev),
710 // no sandbox here because virtqueue handling is exported to a different process.
711 jail: None,
712 })
713}
714
715#[cfg(feature = "gpu")]
David Tolnay2b089fc2019-03-04 15:33:22 -0800716fn create_gpu_device(
717 cfg: &Config,
Michael Hoyle685316f2020-09-16 15:29:20 -0700718 exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800719 gpu_device_tube: Tube,
720 resource_bridges: Vec<Tube>,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900721 wayland_socket_path: Option<&PathBuf>,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700722 x_display: Option<String>,
Zach Reizner65b98f12019-11-22 17:34:58 -0800723 event_devices: Vec<EventDevice>,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700724 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800725) -> DeviceResult {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700726 let mut display_backends = vec![
727 virtio::DisplayBackend::X(x_display),
Jason Macnak60eb1fb2020-01-09 14:36:29 -0800728 virtio::DisplayBackend::Stub,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700729 ];
730
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700731 let wayland_socket_dirs = cfg
732 .wayland_socket_paths
733 .iter()
734 .map(|(_name, path)| path.parent())
735 .collect::<Option<Vec<_>>>()
Daniel Verkamp6b298582021-08-16 15:37:11 -0700736 .ok_or_else(|| anyhow!("wayland socket path has no parent or file name"))?;
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700737
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900738 if let Some(socket_path) = wayland_socket_path {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700739 display_backends.insert(
740 0,
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700741 virtio::DisplayBackend::Wayland(Some(socket_path.to_owned())),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700742 );
743 }
744
David Tolnay2b089fc2019-03-04 15:33:22 -0800745 let dev = virtio::Gpu::new(
Daniel Verkamp6b298582021-08-16 15:37:11 -0700746 exit_evt.try_clone().context("failed to clone event")?,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800747 Some(gpu_device_tube),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800748 resource_bridges,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700749 display_backends,
Jason Macnakcc7070b2019-11-06 14:48:12 -0800750 cfg.gpu_parameters.as_ref().unwrap(),
Zach Reizner65b98f12019-11-22 17:34:58 -0800751 event_devices,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700752 map_request,
753 cfg.sandbox,
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100754 virtio::base_features(cfg.protected_vm),
Gurchetan Singh781d9752021-02-15 17:45:22 -0800755 cfg.wayland_socket_paths.clone(),
David Tolnay2b089fc2019-03-04 15:33:22 -0800756 );
757
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700758 let jail = match simple_jail(cfg, "gpu_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800759 Some(mut jail) => {
760 // Create a tmpfs in the device's root directory so that we can bind mount the
761 // dri directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
762 jail.mount_with_data(
763 Path::new("none"),
764 Path::new("/"),
765 "tmpfs",
766 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
767 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800768 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800769
770 // Device nodes required for DRM.
771 let sys_dev_char_path = Path::new("/sys/dev/char");
David Tolnayfd0971d2019-03-04 17:15:57 -0800772 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800773 let sys_devices_path = Path::new("/sys/devices");
David Tolnayfd0971d2019-03-04 17:15:57 -0800774 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
Jason Macnak23400522020-08-28 09:10:46 -0700775
David Tolnay2b089fc2019-03-04 15:33:22 -0800776 let drm_dri_path = Path::new("/dev/dri");
Jason Macnak23400522020-08-28 09:10:46 -0700777 if drm_dri_path.exists() {
778 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
779 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800780
John Batesb220eac2020-09-14 17:03:02 -0700781 // Prepare GPU shader disk cache directory.
782 if let Some(cache_dir) = cfg
783 .gpu_parameters
784 .as_ref()
785 .and_then(|params| params.cache_path.as_ref())
786 {
787 if cfg!(any(target_arch = "arm", target_arch = "aarch64")) && cfg.sandbox {
788 warn!("shader caching not yet supported on ARM with sandbox enabled");
789 env::set_var("MESA_GLSL_CACHE_DISABLE", "true");
790 } else {
John Bates04059732020-10-01 15:58:55 -0700791 env::set_var("MESA_GLSL_CACHE_DISABLE", "false");
John Batesb220eac2020-09-14 17:03:02 -0700792 env::set_var("MESA_GLSL_CACHE_DIR", cache_dir);
793 if let Some(cache_size) = cfg
794 .gpu_parameters
795 .as_ref()
796 .and_then(|params| params.cache_size.as_ref())
797 {
798 env::set_var("MESA_GLSL_CACHE_MAX_SIZE", cache_size);
799 }
800 let shadercache_path = Path::new(cache_dir);
801 jail.mount_bind(shadercache_path, shadercache_path, true)?;
802 }
803 }
804
David Riley06787c52019-07-24 12:09:07 -0700805 // If the ARM specific devices exist on the host, bind mount them in.
806 let mali0_path = Path::new("/dev/mali0");
807 if mali0_path.exists() {
808 jail.mount_bind(mali0_path, mali0_path, true)?;
809 }
810
811 let pvr_sync_path = Path::new("/dev/pvr_sync");
812 if pvr_sync_path.exists() {
813 jail.mount_bind(pvr_sync_path, pvr_sync_path, true)?;
814 }
815
Gurchetan Singhb66d6f62019-11-08 10:41:29 -0800816 // If the udmabuf driver exists on the host, bind mount it in.
817 let udmabuf_path = Path::new("/dev/udmabuf");
818 if udmabuf_path.exists() {
819 jail.mount_bind(udmabuf_path, udmabuf_path, true)?;
820 }
821
David Tolnay2b089fc2019-03-04 15:33:22 -0800822 // Libraries that are required when mesa drivers are dynamically loaded.
Chia-I Wud562b1a2020-12-27 21:08:27 -0800823 let lib_dirs = &[
824 "/usr/lib",
825 "/usr/lib64",
826 "/lib",
827 "/lib64",
John Batesef085de2021-03-15 08:55:54 -0700828 "/usr/share/glvnd",
Chia-I Wud562b1a2020-12-27 21:08:27 -0800829 "/usr/share/vulkan",
830 ];
David Riley06787c52019-07-24 12:09:07 -0700831 for dir in lib_dirs {
832 let dir_path = Path::new(dir);
833 if dir_path.exists() {
834 jail.mount_bind(dir_path, dir_path, false)?;
835 }
836 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800837
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700838 // Bind mount the wayland socket's directory into jail's root. This is necessary since
839 // each new wayland context must open() the socket. If the wayland socket is ever
840 // destroyed and remade in the same host directory, new connections will be possible
841 // without restarting the wayland device.
842 for dir in &wayland_socket_dirs {
843 jail.mount_bind(dir, dir, true)?;
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700844 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800845
Fergus Dall51200512021-08-19 12:54:26 +1000846 add_current_user_to_jail(&mut jail)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800847
David Riley54e660b2019-07-24 17:22:50 -0700848 // pvr driver requires read access to /proc/self/task/*/comm.
849 let proc_path = Path::new("/proc");
850 jail.mount(
851 proc_path,
852 proc_path,
853 "proc",
854 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY) as usize,
855 )?;
856
John Bates0d9d0e32020-12-03 11:37:33 -0800857 // To enable perfetto tracing, we need to give access to the perfetto service IPC
858 // endpoints.
859 let perfetto_path = Path::new("/run/perfetto");
860 if perfetto_path.exists() {
861 jail.mount_bind(perfetto_path, perfetto_path, true)?;
862 }
863
David Tolnay2b089fc2019-03-04 15:33:22 -0800864 Some(jail)
865 }
866 None => None,
867 };
868
869 Ok(VirtioDeviceStub {
870 dev: Box::new(dev),
871 jail,
872 })
873}
874
875fn create_wayland_device(
876 cfg: &Config,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800877 control_tube: Tube,
878 resource_bridge: Option<Tube>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800879) -> DeviceResult {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900880 let wayland_socket_dirs = cfg
881 .wayland_socket_paths
882 .iter()
883 .map(|(_name, path)| path.parent())
884 .collect::<Option<Vec<_>>>()
Daniel Verkamp6b298582021-08-16 15:37:11 -0700885 .ok_or_else(|| anyhow!("wayland socket path has no parent or file name"))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800886
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100887 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +0100888 let dev = virtio::Wl::new(
889 features,
890 cfg.wayland_socket_paths.clone(),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800891 control_tube,
Will Deacon81d5adb2020-10-06 18:37:48 +0100892 resource_bridge,
893 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700894 .context("failed to create wayland device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800895
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700896 let jail = match simple_jail(cfg, "wl_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800897 Some(mut jail) => {
898 // Create a tmpfs in the device's root directory so that we can bind mount the wayland
899 // socket directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
900 jail.mount_with_data(
901 Path::new("none"),
902 Path::new("/"),
903 "tmpfs",
904 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
905 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800906 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800907
908 // Bind mount the wayland socket's directory into jail's root. This is necessary since
909 // each new wayland context must open() the socket. If the wayland socket is ever
910 // destroyed and remade in the same host directory, new connections will be possible
911 // without restarting the wayland device.
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900912 for dir in &wayland_socket_dirs {
913 jail.mount_bind(dir, dir, true)?;
914 }
Fergus Dall51200512021-08-19 12:54:26 +1000915 add_current_user_to_jail(&mut jail)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800916
917 Some(jail)
918 }
919 None => None,
920 };
921
922 Ok(VirtioDeviceStub {
923 dev: Box::new(dev),
924 jail,
925 })
926}
927
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900928#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
929fn create_video_device(
930 cfg: &Config,
931 typ: devices::virtio::VideoDeviceType,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800932 resource_bridge: Tube,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900933) -> DeviceResult {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700934 let jail = match simple_jail(cfg, "video_device")? {
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900935 Some(mut jail) => {
936 match typ {
Alexandre Courbot8230abf2021-06-26 22:49:26 +0900937 #[cfg(feature = "video-decoder")]
Fergus Dall51200512021-08-19 12:54:26 +1000938 devices::virtio::VideoDeviceType::Decoder => add_current_user_to_jail(&mut jail)?,
Alexandre Courbot8230abf2021-06-26 22:49:26 +0900939 #[cfg(feature = "video-encoder")]
Fergus Dall51200512021-08-19 12:54:26 +1000940 devices::virtio::VideoDeviceType::Encoder => add_current_user_to_jail(&mut jail)?,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900941 };
942
943 // Create a tmpfs in the device's root directory so that we can bind mount files.
944 jail.mount_with_data(
945 Path::new("none"),
946 Path::new("/"),
947 "tmpfs",
948 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
949 "size=67108864",
950 )?;
951
952 // Render node for libvda.
953 let dev_dri_path = Path::new("/dev/dri/renderD128");
954 jail.mount_bind(dev_dri_path, dev_dri_path, false)?;
955
David Stevense341d0a2020-10-08 18:02:32 +0900956 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
957 {
958 // Device nodes used by libdrm through minigbm in libvda on AMD devices.
959 let sys_dev_char_path = Path::new("/sys/dev/char");
960 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
961 let sys_devices_path = Path::new("/sys/devices");
962 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
963
964 // Required for loading dri libraries loaded by minigbm on AMD devices.
965 let lib_dir = Path::new("/usr/lib64");
966 jail.mount_bind(lib_dir, lib_dir, false)?;
967 }
968
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900969 // Device nodes required by libchrome which establishes Mojo connection in libvda.
970 let dev_urandom_path = Path::new("/dev/urandom");
971 jail.mount_bind(dev_urandom_path, dev_urandom_path, false)?;
972 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
973 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
974
975 Some(jail)
976 }
977 None => None,
978 };
979
980 Ok(VirtioDeviceStub {
981 dev: Box::new(devices::virtio::VideoDevice::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100982 virtio::base_features(cfg.protected_vm),
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900983 typ,
984 Some(resource_bridge),
985 )),
986 jail,
987 })
988}
989
990#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
991fn register_video_device(
992 devs: &mut Vec<VirtioDeviceStub>,
Daniel Verkampffb59122021-03-18 14:06:15 -0700993 video_tube: Tube,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900994 cfg: &Config,
995 typ: devices::virtio::VideoDeviceType,
Daniel Verkamp6b298582021-08-16 15:37:11 -0700996) -> Result<()> {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800997 devs.push(create_video_device(cfg, typ, video_tube)?);
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900998 Ok(())
999}
1000
Chirantan Ekbote3e8d52b2021-09-10 18:27:16 +09001001fn create_vhost_vsock_device(cfg: &Config, cid: u64) -> DeviceResult {
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001002 let features = virtio::base_features(cfg.protected_vm);
Christian Blichmann50f95912021-11-05 16:59:39 +01001003
1004 let device_file = match cfg
1005 .vhost_vsock_device
1006 .as_ref()
1007 .unwrap_or(&VhostVsockDeviceParameter::default())
1008 {
1009 VhostVsockDeviceParameter::Fd(fd) => {
1010 let fd = validate_raw_descriptor(*fd)
1011 .context("failed to validate fd for virtual socker device")?;
1012 // Safe because the `fd` is actually owned by this process and
1013 // we have a unique handle to it.
1014 unsafe { File::from_raw_fd(fd) }
1015 }
1016 VhostVsockDeviceParameter::Path(path) => OpenOptions::new()
1017 .read(true)
1018 .write(true)
1019 .custom_flags(libc::O_CLOEXEC | libc::O_NONBLOCK)
1020 .open(path)
1021 .context("failed to open virtual socket device")?,
1022 };
1023
1024 let dev = virtio::vhost::Vsock::new(device_file, features, cid)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001025 .context("failed to set up virtual socket device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001026
1027 Ok(VirtioDeviceStub {
1028 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001029 jail: simple_jail(cfg, "vhost_vsock_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -08001030 })
1031}
1032
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001033fn create_fs_device(
1034 cfg: &Config,
1035 uid_map: &str,
1036 gid_map: &str,
1037 src: &Path,
1038 tag: &str,
1039 fs_cfg: virtio::fs::passthrough::Config,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001040 device_tube: Tube,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001041) -> DeviceResult {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001042 let max_open_files =
1043 base::get_max_open_files().context("failed to get max number of open files")?;
Matt Delcoc24ad782020-02-14 13:24:36 -08001044 let j = if cfg.sandbox {
1045 let seccomp_policy = cfg.seccomp_policy_dir.join("fs_device");
1046 let config = SandboxConfig {
1047 limit_caps: false,
1048 uid_map: Some(uid_map),
1049 gid_map: Some(gid_map),
1050 log_failures: cfg.seccomp_log_failures,
1051 seccomp_policy: &seccomp_policy,
1052 };
Chirantan Ekbote34d45e52020-04-20 18:15:02 +09001053 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
1054 // We want bind mounts from the parent namespaces to propagate into the fs device's
1055 // namespace.
1056 jail.set_remount_mode(libc::MS_SLAVE);
1057
1058 jail
Matt Delcoc24ad782020-02-14 13:24:36 -08001059 } else {
1060 create_base_minijail(src, Some(max_open_files), None)?
1061 };
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001062
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001063 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001064 // TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic
1065 // when num_queues > 1.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001066 let dev = virtio::fs::Fs::new(features, tag, 1, fs_cfg, device_tube)
1067 .context("failed to create fs device")?;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001068
1069 Ok(VirtioDeviceStub {
1070 dev: Box::new(dev),
1071 jail: Some(j),
1072 })
1073}
1074
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001075fn create_9p_device(
1076 cfg: &Config,
1077 uid_map: &str,
1078 gid_map: &str,
1079 src: &Path,
1080 tag: &str,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001081 mut p9_cfg: p9::Config,
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001082) -> DeviceResult {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001083 let max_open_files =
1084 base::get_max_open_files().context("failed to get max number of open files")?;
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001085 let (jail, root) = if cfg.sandbox {
1086 let seccomp_policy = cfg.seccomp_policy_dir.join("9p_device");
1087 let config = SandboxConfig {
1088 limit_caps: false,
1089 uid_map: Some(uid_map),
1090 gid_map: Some(gid_map),
1091 log_failures: cfg.seccomp_log_failures,
1092 seccomp_policy: &seccomp_policy,
1093 };
David Tolnay2b089fc2019-03-04 15:33:22 -08001094
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001095 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
1096 // We want bind mounts from the parent namespaces to propagate into the 9p server's
1097 // namespace.
1098 jail.set_remount_mode(libc::MS_SLAVE);
Chirantan Ekbote055de382020-01-24 12:16:58 +09001099
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001100 // The shared directory becomes the root of the device's file system.
1101 let root = Path::new("/");
1102 (Some(jail), root)
1103 } else {
1104 // There's no mount namespace so we tell the server to treat the source directory as the
1105 // root.
1106 (None, src)
David Tolnay2b089fc2019-03-04 15:33:22 -08001107 };
1108
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001109 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001110 p9_cfg.root = root.into();
Daniel Verkamp6b298582021-08-16 15:37:11 -07001111 let dev = virtio::P9::new(features, tag, p9_cfg).context("failed to create 9p device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001112
1113 Ok(VirtioDeviceStub {
1114 dev: Box::new(dev),
1115 jail,
1116 })
1117}
1118
Jakub Starona3411ea2019-04-24 10:55:25 -07001119fn create_pmem_device(
1120 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001121 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001122 resources: &mut SystemAllocator,
1123 disk: &DiskOption,
1124 index: usize,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001125 pmem_device_tube: Tube,
Jakub Starona3411ea2019-04-24 10:55:25 -07001126) -> DeviceResult {
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09001127 let fd = open_file(&disk.path, disk.read_only, false /*O_DIRECT*/)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001128 .with_context(|| format!("failed to load disk image {}", disk.path.display()))?;
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001129
1130 let (disk_size, arena_size) = {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001131 let metadata = std::fs::metadata(&disk.path).with_context(|| {
1132 format!("failed to get disk image {} metadata", disk.path.display())
1133 })?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001134 let disk_len = metadata.len();
1135 // Linux requires pmem region sizes to be 2 MiB aligned. Linux will fill any partial page
1136 // at the end of an mmap'd file and won't write back beyond the actual file length, but if
1137 // we just align the size of the file to 2 MiB then access beyond the last page of the
1138 // mapped file will generate SIGBUS. So use a memory mapping arena that will provide
1139 // padding up to 2 MiB.
1140 let alignment = 2 * 1024 * 1024;
1141 let align_adjust = if disk_len % alignment != 0 {
1142 alignment - (disk_len % alignment)
1143 } else {
1144 0
1145 };
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001146 (
1147 disk_len,
1148 disk_len
1149 .checked_add(align_adjust)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001150 .ok_or_else(|| anyhow!("pmem device image too big"))?,
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001151 )
Jakub Starona3411ea2019-04-24 10:55:25 -07001152 };
1153
1154 let protection = {
1155 if disk.read_only {
1156 Protection::read()
1157 } else {
1158 Protection::read_write()
1159 }
1160 };
1161
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001162 let arena = {
Jakub Starona3411ea2019-04-24 10:55:25 -07001163 // Conversion from u64 to usize may fail on 32bit system.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001164 let arena_size = usize::try_from(arena_size).context("pmem device image too big")?;
1165 let disk_size = usize::try_from(disk_size).context("pmem device image too big")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001166
Daniel Verkamp6b298582021-08-16 15:37:11 -07001167 let mut arena =
1168 MemoryMappingArena::new(arena_size).context("failed to reserve pmem memory")?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001169 arena
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001170 .add_fd_offset_protection(0, disk_size, &fd, 0, protection)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001171 .context("failed to reserve pmem memory")?;
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001172
1173 // If the disk is not a multiple of the page size, the OS will fill the remaining part
1174 // of the page with zeroes. However, the anonymous mapping added below must start on a
1175 // page boundary, so round up the size before calculating the offset of the anon region.
1176 let disk_size = round_up_to_page_size(disk_size);
1177
1178 if arena_size > disk_size {
1179 // Add an anonymous region with the same protection as the disk mapping if the arena
1180 // size was aligned.
1181 arena
1182 .add_anon_protection(disk_size, arena_size - disk_size, protection)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001183 .context("failed to reserve pmem padding")?;
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001184 }
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001185 arena
Jakub Starona3411ea2019-04-24 10:55:25 -07001186 };
1187
1188 let mapping_address = resources
Xiong Zhang383b3b52019-10-30 14:59:26 +08001189 .mmio_allocator(MmioType::High)
Daniel Verkamp57e4f542021-10-28 09:56:40 -07001190 .reverse_allocate_with_align(
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001191 arena_size,
Jakub Starona3411ea2019-04-24 10:55:25 -07001192 Alloc::PmemDevice(index),
1193 format!("pmem_disk_image_{}", index),
1194 // Linux kernel requires pmem namespaces to be 128 MiB aligned.
1195 128 * 1024 * 1024, /* 128 MiB */
1196 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001197 .context("failed to allocate memory for pmem device")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001198
Daniel Verkampe1980a92020-02-07 11:00:55 -08001199 let slot = vm
Gurchetan Singh173fe622020-05-21 18:05:06 -07001200 .add_memory_region(
Daniel Verkampe1980a92020-02-07 11:00:55 -08001201 GuestAddress(mapping_address),
Gurchetan Singh173fe622020-05-21 18:05:06 -07001202 Box::new(arena),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001203 /* read_only = */ disk.read_only,
1204 /* log_dirty_pages = */ false,
1205 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001206 .context("failed to add pmem device memory")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001207
Daniel Verkampe1980a92020-02-07 11:00:55 -08001208 let dev = virtio::Pmem::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001209 virtio::base_features(cfg.protected_vm),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001210 fd,
1211 GuestAddress(mapping_address),
1212 slot,
1213 arena_size,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001214 Some(pmem_device_tube),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001215 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001216 .context("failed to create pmem device")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001217
1218 Ok(VirtioDeviceStub {
1219 dev: Box::new(dev) as Box<dyn VirtioDevice>,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001220 jail: simple_jail(cfg, "pmem_device")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07001221 })
1222}
1223
Zide Chendfc4b882021-03-10 16:35:37 -08001224fn create_iommu_device(
1225 cfg: &Config,
Zide Chen71435c12021-03-03 15:02:02 -08001226 phys_max_addr: u64,
Zide Chendfc4b882021-03-10 16:35:37 -08001227 endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
1228) -> DeviceResult {
Zide Chen71435c12021-03-03 15:02:02 -08001229 let dev = virtio::Iommu::new(
1230 virtio::base_features(cfg.protected_vm),
1231 endpoints,
1232 phys_max_addr,
1233 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001234 .context("failed to create IOMMU device")?;
Zide Chendfc4b882021-03-10 16:35:37 -08001235
1236 Ok(VirtioDeviceStub {
1237 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001238 jail: simple_jail(cfg, "iommu_device")?,
Zide Chendfc4b882021-03-10 16:35:37 -08001239 })
1240}
1241
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001242fn create_console_device(cfg: &Config, param: &SerialParameters) -> DeviceResult {
Michael Hoylecd23bc22020-10-20 22:12:20 -07001243 let mut keep_rds = Vec::new();
Daniel Verkamp6b298582021-08-16 15:37:11 -07001244 let evt = Event::new().context("failed to create event")?;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001245 let dev = param
Michael Hoylecd23bc22020-10-20 22:12:20 -07001246 .create_serial_device::<Console>(cfg.protected_vm, &evt, &mut keep_rds)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001247 .context("failed to create console device")?;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001248
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001249 let jail = match simple_jail(cfg, "serial")? {
Nicholas Verne71e73d82020-07-08 17:19:55 +10001250 Some(mut jail) => {
1251 // Create a tmpfs in the device's root directory so that we can bind mount the
1252 // log socket directory into it.
1253 // The size=67108864 is size=64*1024*1024 or size=64MB.
1254 jail.mount_with_data(
1255 Path::new("none"),
1256 Path::new("/"),
1257 "tmpfs",
1258 (libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_NOSUID) as usize,
1259 "size=67108864",
1260 )?;
Fergus Dall51200512021-08-19 12:54:26 +10001261 add_current_user_to_jail(&mut jail)?;
Nicholas Verne71e73d82020-07-08 17:19:55 +10001262 let res = param.add_bind_mounts(&mut jail);
1263 if res.is_err() {
1264 error!("failed to add bind mounts for console device");
1265 }
1266 Some(jail)
1267 }
1268 None => None,
1269 };
1270
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001271 Ok(VirtioDeviceStub {
1272 dev: Box::new(dev),
Nicholas Verne71e73d82020-07-08 17:19:55 +10001273 jail, // TODO(dverkamp): use a separate policy for console?
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001274 })
1275}
1276
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001277#[cfg(feature = "audio")]
1278fn create_sound_device(path: &Path, cfg: &Config) -> DeviceResult {
1279 let dev = virtio::new_sound(path, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001280 .context("failed to create sound device")?;
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001281
1282 Ok(VirtioDeviceStub {
1283 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001284 jail: simple_jail(cfg, "vios_audio_device")?,
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001285 })
1286}
1287
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001288// gpu_device_tube is not used when GPU support is disabled.
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -07001289#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001290fn create_virtio_devices(
1291 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001292 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001293 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001294 _exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001295 wayland_device_tube: Tube,
1296 gpu_device_tube: Tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001297 vhost_user_gpu_tubes: Vec<(Tube, Tube)>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001298 balloon_device_tube: Tube,
1299 disk_device_tubes: &mut Vec<Tube>,
1300 pmem_device_tubes: &mut Vec<Tube>,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001301 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001302 fs_device_tubes: &mut Vec<Tube>,
David Tolnay2b089fc2019-03-04 15:33:22 -08001303) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -07001304 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -08001305
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001306 for (_, param) in cfg
1307 .serial_parameters
1308 .iter()
1309 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
1310 {
1311 let dev = create_console_device(cfg, param)?;
1312 devs.push(dev);
1313 }
1314
Zach Reizner8fb52112017-12-13 16:04:39 -08001315 for disk in &cfg.disks {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001316 let disk_device_tube = disk_device_tubes.remove(0);
1317 devs.push(create_block_device(cfg, disk, disk_device_tube)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001318 }
1319
Keiichi Watanabef3a37f42021-01-21 15:41:11 +09001320 for blk in &cfg.vhost_user_blk {
1321 devs.push(create_vhost_user_block_device(cfg, blk)?);
1322 }
1323
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +09001324 for console in &cfg.vhost_user_console {
1325 devs.push(create_vhost_user_console_device(cfg, console)?);
1326 }
1327
Jakub Starona3411ea2019-04-24 10:55:25 -07001328 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001329 let pmem_device_tube = pmem_device_tubes.remove(0);
Daniel Verkampe1980a92020-02-07 11:00:55 -08001330 devs.push(create_pmem_device(
1331 cfg,
1332 vm,
1333 resources,
1334 pmem_disk,
1335 index,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001336 pmem_device_tube,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001337 )?);
Jakub Starona3411ea2019-04-24 10:55:25 -07001338 }
1339
David Tolnay2b089fc2019-03-04 15:33:22 -08001340 devs.push(create_rng_device(cfg)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001341
Woody Chow737ff122021-03-22 17:49:57 +09001342 #[cfg(feature = "audio_cras")]
1343 {
Woody Chow0b2b6062021-09-03 15:40:02 +09001344 if let Some(cras_snd) = &cfg.cras_snd {
1345 devs.push(create_cras_snd_device(cfg, cras_snd.clone())?);
Woody Chow737ff122021-03-22 17:49:57 +09001346 }
1347 }
1348
David Tolnayde6b29a2018-12-20 11:49:46 -08001349 #[cfg(feature = "tpm")]
1350 {
David Tolnay43f8e212019-02-13 17:28:16 -08001351 if cfg.software_tpm {
David Tolnay2b089fc2019-03-04 15:33:22 -08001352 devs.push(create_tpm_device(cfg)?);
David Tolnay43f8e212019-02-13 17:28:16 -08001353 }
David Tolnayde6b29a2018-12-20 11:49:46 -08001354 }
1355
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001356 for (idx, single_touch_spec) in cfg.virtio_single_touch.iter().enumerate() {
1357 devs.push(create_single_touch_device(
1358 cfg,
1359 single_touch_spec,
1360 idx as u32,
1361 )?);
Jorge E. Moreira99d3f082019-03-07 10:59:54 -08001362 }
1363
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001364 for (idx, multi_touch_spec) in cfg.virtio_multi_touch.iter().enumerate() {
1365 devs.push(create_multi_touch_device(
1366 cfg,
1367 multi_touch_spec,
1368 idx as u32,
1369 )?);
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001370 }
1371
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001372 for (idx, trackpad_spec) in cfg.virtio_trackpad.iter().enumerate() {
1373 devs.push(create_trackpad_device(cfg, trackpad_spec, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001374 }
1375
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001376 for (idx, mouse_socket) in cfg.virtio_mice.iter().enumerate() {
1377 devs.push(create_mouse_device(cfg, mouse_socket, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001378 }
1379
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001380 for (idx, keyboard_socket) in cfg.virtio_keyboard.iter().enumerate() {
1381 devs.push(create_keyboard_device(cfg, keyboard_socket, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001382 }
1383
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001384 for (idx, switches_socket) in cfg.virtio_switches.iter().enumerate() {
1385 devs.push(create_switches_device(cfg, switches_socket, idx as u32)?);
Daniel Norman5e23df72021-03-11 10:11:02 -08001386 }
1387
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001388 for dev_path in &cfg.virtio_input_evdevs {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001389 devs.push(create_vinput_device(cfg, dev_path)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001390 }
1391
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001392 devs.push(create_balloon_device(cfg, balloon_device_tube)?);
Dylan Reid295ccac2017-11-06 14:06:24 -08001393
Zach Reizner39aa26b2017-12-12 18:03:23 -08001394 // We checked above that if the IP is defined, then the netmask is, too.
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001395 for tap_fd in &cfg.tap_fd {
David Tolnay2b089fc2019-03-04 15:33:22 -08001396 devs.push(create_tap_net_device(cfg, *tap_fd)?);
Jorge E. Moreirab7952802019-02-12 16:43:05 -08001397 }
1398
David Tolnay2b089fc2019-03-04 15:33:22 -08001399 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
1400 (cfg.host_ip, cfg.netmask, cfg.mac_address)
1401 {
Keiichi Watanabe60686582021-03-12 04:53:51 +09001402 if !cfg.vhost_user_net.is_empty() {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001403 bail!("vhost-user-net cannot be used with any of --host_ip, --netmask or --mac");
Keiichi Watanabe60686582021-03-12 04:53:51 +09001404 }
Chirantan Ekbote3e8d52b2021-09-10 18:27:16 +09001405 devs.push(create_net_device(cfg, host_ip, netmask, mac_address)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001406 }
1407
Keiichi Watanabe60686582021-03-12 04:53:51 +09001408 for net in &cfg.vhost_user_net {
1409 devs.push(create_vhost_user_net_device(cfg, net)?);
1410 }
1411
Chirantan Ekbote84091e52021-09-10 18:43:17 +09001412 for vsock in &cfg.vhost_user_vsock {
1413 devs.push(create_vhost_user_vsock_device(cfg, vsock)?);
1414 }
1415
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09001416 for opt in &cfg.vhost_user_wl {
1417 devs.push(create_vhost_user_wl_device(cfg, opt)?);
1418 }
1419
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001420 #[cfg(feature = "gpu")]
1421 for (opt, (host_tube, device_tube)) in cfg.vhost_user_gpu.iter().zip(vhost_user_gpu_tubes) {
1422 devs.push(create_vhost_user_gpu_device(
1423 cfg,
1424 opt,
1425 host_tube,
1426 device_tube,
1427 )?);
1428 }
1429
David Tolnayfa701712019-02-13 16:42:54 -08001430 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001431 let mut resource_bridges = Vec::<Tube>::new();
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001432
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001433 if !cfg.wayland_socket_paths.is_empty() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001434 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001435 let mut wl_resource_bridge = None::<Tube>;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001436
1437 #[cfg(feature = "gpu")]
1438 {
Jason Macnakcc7070b2019-11-06 14:48:12 -08001439 if cfg.gpu_parameters.is_some() {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001440 let (wl_socket, gpu_socket) = Tube::pair().context("failed to create tube")?;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001441 resource_bridges.push(gpu_socket);
1442 wl_resource_bridge = Some(wl_socket);
1443 }
1444 }
1445
1446 devs.push(create_wayland_device(
1447 cfg,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001448 wayland_device_tube,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001449 wl_resource_bridge,
1450 )?);
1451 }
David Tolnayfa701712019-02-13 16:42:54 -08001452
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001453 #[cfg(feature = "video-decoder")]
Daniel Verkampffb59122021-03-18 14:06:15 -07001454 let video_dec_tube = if cfg.video_dec {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001455 let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
Daniel Verkampffb59122021-03-18 14:06:15 -07001456 resource_bridges.push(gpu_tube);
1457 Some(video_tube)
1458 } else {
1459 None
1460 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001461
1462 #[cfg(feature = "video-encoder")]
Daniel Verkampffb59122021-03-18 14:06:15 -07001463 let video_enc_tube = if cfg.video_enc {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001464 let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
Daniel Verkampffb59122021-03-18 14:06:15 -07001465 resource_bridges.push(gpu_tube);
1466 Some(video_tube)
1467 } else {
1468 None
1469 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001470
Zach Reizner3a8100a2017-09-13 19:15:43 -07001471 #[cfg(feature = "gpu")]
1472 {
Noah Golddc7f52b2020-02-01 13:01:58 -08001473 if let Some(gpu_parameters) = &cfg.gpu_parameters {
Jason Macnakd659a0d2021-03-15 15:33:01 -07001474 let mut gpu_display_w = DEFAULT_DISPLAY_WIDTH;
1475 let mut gpu_display_h = DEFAULT_DISPLAY_HEIGHT;
1476 if !gpu_parameters.displays.is_empty() {
1477 gpu_display_w = gpu_parameters.displays[0].width;
1478 gpu_display_h = gpu_parameters.displays[0].height;
1479 }
1480
Zach Reizner65b98f12019-11-22 17:34:58 -08001481 let mut event_devices = Vec::new();
1482 if cfg.display_window_mouse {
1483 let (event_device_socket, virtio_dev_socket) =
Daniel Verkamp6b298582021-08-16 15:37:11 -07001484 UnixStream::pair().context("failed to create socket")?;
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001485 let (multi_touch_width, multi_touch_height) = cfg
1486 .virtio_multi_touch
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001487 .first()
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001488 .as_ref()
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001489 .map(|multi_touch_spec| multi_touch_spec.get_size())
Jason Macnakd659a0d2021-03-15 15:33:01 -07001490 .unwrap_or((gpu_display_w, gpu_display_h));
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001491 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001492 // u32::MAX is the least likely to collide with the indices generated above for
1493 // the multi_touch options, which begin at 0.
1494 u32::MAX,
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001495 virtio_dev_socket,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001496 multi_touch_width,
1497 multi_touch_height,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001498 virtio::base_features(cfg.protected_vm),
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001499 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001500 .context("failed to set up mouse device")?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001501 devs.push(VirtioDeviceStub {
1502 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001503 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001504 });
1505 event_devices.push(EventDevice::touchscreen(event_device_socket));
1506 }
1507 if cfg.display_window_keyboard {
1508 let (event_device_socket, virtio_dev_socket) =
Daniel Verkamp6b298582021-08-16 15:37:11 -07001509 UnixStream::pair().context("failed to create socket")?;
Noah Goldd4ca29b2020-10-27 12:21:52 -07001510 let dev = virtio::new_keyboard(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001511 // u32::MAX is the least likely to collide with the indices generated above for
1512 // the multi_touch options, which begin at 0.
1513 u32::MAX,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001514 virtio_dev_socket,
1515 virtio::base_features(cfg.protected_vm),
1516 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001517 .context("failed to set up keyboard device")?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001518 devs.push(VirtioDeviceStub {
1519 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001520 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001521 });
1522 event_devices.push(EventDevice::keyboard(event_device_socket));
1523 }
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001524 devs.push(create_gpu_device(
1525 cfg,
1526 _exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001527 gpu_device_tube,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001528 resource_bridges,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001529 // Use the unnamed socket for GPU display screens.
1530 cfg.wayland_socket_paths.get(""),
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001531 cfg.x_display.clone(),
Zach Reizner65b98f12019-11-22 17:34:58 -08001532 event_devices,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001533 map_request,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001534 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -07001535 }
1536 }
1537
Daniel Verkampffb59122021-03-18 14:06:15 -07001538 #[cfg(feature = "video-decoder")]
1539 {
1540 if let Some(video_dec_tube) = video_dec_tube {
1541 register_video_device(
1542 &mut devs,
1543 video_dec_tube,
1544 cfg,
1545 devices::virtio::VideoDeviceType::Decoder,
1546 )?;
1547 }
1548 }
1549
1550 #[cfg(feature = "video-encoder")]
1551 {
1552 if let Some(video_enc_tube) = video_enc_tube {
1553 register_video_device(
1554 &mut devs,
1555 video_enc_tube,
1556 cfg,
1557 devices::virtio::VideoDeviceType::Encoder,
1558 )?;
1559 }
1560 }
1561
Zach Reizneraa575662018-08-15 10:46:32 -07001562 if let Some(cid) = cfg.cid {
Chirantan Ekbote3e8d52b2021-09-10 18:27:16 +09001563 devs.push(create_vhost_vsock_device(cfg, cid)?);
Zach Reizneraa575662018-08-15 10:46:32 -07001564 }
1565
Woody Chow5890b702021-02-12 14:57:02 +09001566 for vhost_user_fs in &cfg.vhost_user_fs {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001567 devs.push(create_vhost_user_fs_device(cfg, vhost_user_fs)?);
Woody Chow5890b702021-02-12 14:57:02 +09001568 }
1569
Woody Chow1b16db12021-04-02 16:59:59 +09001570 #[cfg(feature = "audio")]
1571 for vhost_user_snd in &cfg.vhost_user_snd {
1572 devs.push(create_vhost_user_snd_device(cfg, vhost_user_snd)?);
1573 }
1574
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001575 for shared_dir in &cfg.shared_dirs {
1576 let SharedDir {
1577 src,
1578 tag,
1579 kind,
1580 uid_map,
1581 gid_map,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001582 fs_cfg,
1583 p9_cfg,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001584 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -08001585
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001586 let dev = match kind {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001587 SharedDirKind::FS => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001588 let device_tube = fs_device_tubes.remove(0);
1589 create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone(), device_tube)?
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001590 }
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001591 SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag, p9_cfg.clone())?,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001592 };
1593 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -08001594 }
1595
JaeMan Parkeb9cc532021-07-02 15:02:59 +09001596 if let Some(vhost_user_mac80211_hwsim) = &cfg.vhost_user_mac80211_hwsim {
1597 devs.push(create_vhost_user_mac80211_hwsim_device(
1598 cfg,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001599 vhost_user_mac80211_hwsim,
JaeMan Parkeb9cc532021-07-02 15:02:59 +09001600 )?);
1601 }
1602
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001603 #[cfg(feature = "audio")]
1604 if let Some(path) = &cfg.sound {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001605 devs.push(create_sound_device(path, cfg)?);
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001606 }
1607
David Tolnay2b089fc2019-03-04 15:33:22 -08001608 Ok(devs)
1609}
1610
Xiong Zhang10f15052021-04-08 17:23:33 +08001611fn create_vfio_device(
1612 cfg: &Config,
1613 vm: &impl Vm,
1614 resources: &mut SystemAllocator,
1615 control_tubes: &mut Vec<TaggedControlTube>,
1616 vfio_path: &Path,
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001617 bus_num: Option<u8>,
Zide Chendfc4b882021-03-10 16:35:37 -08001618 endpoints: &mut BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
1619 iommu_enabled: bool,
Xiong Zhang10f15052021-04-08 17:23:33 +08001620) -> DeviceResult<(Box<VfioPciDevice>, Option<Minijail>)> {
Zide Chendfc4b882021-03-10 16:35:37 -08001621 let vfio_container = VfioCommonSetup::vfio_get_container(vfio_path, iommu_enabled)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001622 .context("failed to get vfio container")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001623
1624 // create MSI, MSI-X, and Mem request sockets for each vfio device
Daniel Verkamp6b298582021-08-16 15:37:11 -07001625 let (vfio_host_tube_msi, vfio_device_tube_msi) =
1626 Tube::pair().context("failed to create tube")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001627 control_tubes.push(TaggedControlTube::VmIrq(vfio_host_tube_msi));
1628
Daniel Verkamp6b298582021-08-16 15:37:11 -07001629 let (vfio_host_tube_msix, vfio_device_tube_msix) =
1630 Tube::pair().context("failed to create tube")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001631 control_tubes.push(TaggedControlTube::VmIrq(vfio_host_tube_msix));
1632
Daniel Verkamp6b298582021-08-16 15:37:11 -07001633 let (vfio_host_tube_mem, vfio_device_tube_mem) =
1634 Tube::pair().context("failed to create tube")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001635 control_tubes.push(TaggedControlTube::VmMemory(vfio_host_tube_mem));
1636
Xiong Zhangdea7dbb2021-07-26 14:49:03 +08001637 let vfio_device = VfioDevice::new(vfio_path, vm, vfio_container.clone(), iommu_enabled)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001638 .context("failed to create vfio device")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001639 let mut vfio_pci_device = Box::new(VfioPciDevice::new(
1640 vfio_device,
Xiong Zhange19ab752021-05-20 18:18:46 +08001641 bus_num,
Xiong Zhang10f15052021-04-08 17:23:33 +08001642 vfio_device_tube_msi,
1643 vfio_device_tube_msix,
1644 vfio_device_tube_mem,
1645 ));
1646 // early reservation for pass-through PCI devices.
Zide Chendfc4b882021-03-10 16:35:37 -08001647 let endpoint_addr = vfio_pci_device.allocate_address(resources);
1648 if endpoint_addr.is_err() {
Xiong Zhang10f15052021-04-08 17:23:33 +08001649 warn!(
1650 "address reservation failed for vfio {}",
1651 vfio_pci_device.debug_label()
1652 );
1653 }
1654
Zide Chendfc4b882021-03-10 16:35:37 -08001655 if iommu_enabled {
1656 endpoints.insert(endpoint_addr.unwrap().to_u32(), vfio_container);
1657 }
1658
Xiong Zhang10f15052021-04-08 17:23:33 +08001659 Ok((vfio_pci_device, simple_jail(cfg, "vfio_device")?))
1660}
1661
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001662fn create_vfio_platform_device(
1663 cfg: &Config,
1664 vm: &impl Vm,
1665 _resources: &mut SystemAllocator,
1666 control_tubes: &mut Vec<TaggedControlTube>,
1667 vfio_path: &Path,
1668 _endpoints: &mut BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
1669 iommu_enabled: bool,
1670) -> DeviceResult<(VfioPlatformDevice, Option<Minijail>)> {
1671 let vfio_container = VfioCommonSetup::vfio_get_container(vfio_path, iommu_enabled)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001672 .context("Failed to create vfio device")?;
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001673
Daniel Verkamp6b298582021-08-16 15:37:11 -07001674 let (vfio_host_tube_mem, vfio_device_tube_mem) =
1675 Tube::pair().context("failed to create tube")?;
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001676 control_tubes.push(TaggedControlTube::VmMemory(vfio_host_tube_mem));
1677
1678 let vfio_device = VfioDevice::new(vfio_path, vm, vfio_container, iommu_enabled)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001679 .context("Failed to create vfio device")?;
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001680 let vfio_plat_dev = VfioPlatformDevice::new(vfio_device, vfio_device_tube_mem);
1681
1682 Ok((vfio_plat_dev, simple_jail(cfg, "vfio_platform_device")?))
1683}
1684
David Tolnay2b089fc2019-03-04 15:33:22 -08001685fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -06001686 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001687 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001688 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001689 exit_evt: &Event,
Zide Chen71435c12021-03-03 15:02:02 -08001690 phys_max_addr: u64,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001691 control_tubes: &mut Vec<TaggedControlTube>,
1692 wayland_device_tube: Tube,
1693 gpu_device_tube: Tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001694 vhost_user_gpu_tubes: Vec<(Tube, Tube)>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001695 balloon_device_tube: Tube,
1696 disk_device_tubes: &mut Vec<Tube>,
1697 pmem_device_tubes: &mut Vec<Tube>,
1698 fs_device_tubes: &mut Vec<Tube>,
Daniel Verkampf1439d42021-05-21 13:55:10 -07001699 #[cfg(feature = "usb")] usb_provider: HostBackendDeviceProvider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001700 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001701) -> DeviceResult<Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>> {
David Tolnay2b089fc2019-03-04 15:33:22 -08001702 let stubs = create_virtio_devices(
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001703 cfg,
Jakub Starona3411ea2019-04-24 10:55:25 -07001704 vm,
1705 resources,
David Tolnay2b089fc2019-03-04 15:33:22 -08001706 exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001707 wayland_device_tube,
1708 gpu_device_tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001709 vhost_user_gpu_tubes,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001710 balloon_device_tube,
1711 disk_device_tubes,
1712 pmem_device_tubes,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001713 map_request,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001714 fs_device_tubes,
David Tolnay2b089fc2019-03-04 15:33:22 -08001715 )?;
1716
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001717 let mut devices = Vec::new();
David Tolnay2b089fc2019-03-04 15:33:22 -08001718
1719 for stub in stubs {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001720 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001721 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
Zach Reiznerdc748482021-04-14 13:59:30 -07001722 let dev = VirtioPciDevice::new(vm.get_memory().clone(), stub.dev, msi_device_tube)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001723 .context("failed to create virtio pci dev")?;
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001724 let dev = Box::new(dev) as Box<dyn BusDeviceObj>;
1725 devices.push((dev, stub.jail));
David Tolnay2b089fc2019-03-04 15:33:22 -08001726 }
1727
Andrew Scull1590e6f2020-03-18 18:00:47 +00001728 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +08001729 for ac97_param in &cfg.ac97_parameters {
Zach Reiznerdc748482021-04-14 13:59:30 -07001730 let dev = Ac97Dev::try_new(vm.get_memory().clone(), ac97_param.clone())
Daniel Verkamp6b298582021-08-16 15:37:11 -07001731 .context("failed to create ac97 device")?;
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001732 let jail = simple_jail(cfg, dev.minijail_policy())?;
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001733 devices.push((Box::new(dev), jail));
David Tolnay2b089fc2019-03-04 15:33:22 -08001734 }
Andrew Scull1590e6f2020-03-18 18:00:47 +00001735
Daniel Verkampf1439d42021-05-21 13:55:10 -07001736 #[cfg(feature = "usb")]
1737 {
1738 // Create xhci controller.
1739 let usb_controller = Box::new(XhciController::new(vm.get_memory().clone(), usb_provider));
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001740 devices.push((usb_controller, simple_jail(cfg, "xhci")?));
Daniel Verkampf1439d42021-05-21 13:55:10 -07001741 }
David Tolnay2b089fc2019-03-04 15:33:22 -08001742
Zide Chen5deee482021-04-19 11:06:01 -07001743 if !cfg.vfio.is_empty() {
Zide Chendfc4b882021-03-10 16:35:37 -08001744 let mut iommu_attached_endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>> =
1745 BTreeMap::new();
1746
Tomasz Nowicki71aca792021-06-09 18:53:49 +00001747 for vfio_dev in cfg
1748 .vfio
1749 .iter()
1750 .filter(|dev| dev.get_type() == VfioType::Pci)
1751 {
1752 let vfio_path = &vfio_dev.vfio_path;
Zide Chen5deee482021-04-19 11:06:01 -07001753 let (vfio_pci_device, jail) = create_vfio_device(
1754 cfg,
1755 vm,
1756 resources,
1757 control_tubes,
1758 vfio_path.as_path(),
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001759 None,
Zide Chendfc4b882021-03-10 16:35:37 -08001760 &mut iommu_attached_endpoints,
Tomasz Nowicki71aca792021-06-09 18:53:49 +00001761 vfio_dev.iommu_enabled(),
Zide Chen5deee482021-04-19 11:06:01 -07001762 )?;
Zide Chendfc4b882021-03-10 16:35:37 -08001763
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001764 devices.push((vfio_pci_device, jail));
Zide Chen5deee482021-04-19 11:06:01 -07001765 }
Zide Chendfc4b882021-03-10 16:35:37 -08001766
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001767 for vfio_dev in cfg
1768 .vfio
1769 .iter()
1770 .filter(|dev| dev.get_type() == VfioType::Platform)
1771 {
1772 let vfio_path = &vfio_dev.vfio_path;
1773 let (vfio_plat_dev, jail) = create_vfio_platform_device(
1774 cfg,
1775 vm,
1776 resources,
1777 control_tubes,
1778 vfio_path.as_path(),
1779 &mut iommu_attached_endpoints,
1780 false, // Virtio IOMMU is not supported yet
1781 )?;
1782
1783 devices.push((Box::new(vfio_plat_dev), jail));
1784 }
1785
Zide Chendfc4b882021-03-10 16:35:37 -08001786 if !iommu_attached_endpoints.is_empty() {
Zide Chen71435c12021-03-03 15:02:02 -08001787 let iommu_dev = create_iommu_device(cfg, phys_max_addr, iommu_attached_endpoints)?;
Zide Chendfc4b882021-03-10 16:35:37 -08001788
Daniel Verkamp6b298582021-08-16 15:37:11 -07001789 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
Zide Chendfc4b882021-03-10 16:35:37 -08001790 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
Peter Fangad3b24e2021-06-21 00:43:29 -07001791 let mut dev =
1792 VirtioPciDevice::new(vm.get_memory().clone(), iommu_dev.dev, msi_device_tube)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001793 .context("failed to create virtio pci dev")?;
Peter Fangad3b24e2021-06-21 00:43:29 -07001794 // early reservation for viommu.
1795 dev.allocate_address(resources)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001796 .context("failed to allocate resources early for virtio pci dev")?;
Peter Fangad3b24e2021-06-21 00:43:29 -07001797 let dev = Box::new(dev);
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001798 devices.push((dev, iommu_dev.jail));
Zide Chendfc4b882021-03-10 16:35:37 -08001799 }
Xiong Zhang17b0daf2019-04-23 17:14:50 +08001800 }
1801
Mattias Nisslerde2c6402021-10-21 12:05:29 +00001802 for params in &cfg.stub_pci_devices {
1803 // Stub devices don't need jailing since they don't do anything.
1804 devices.push((Box::new(StubPciDevice::new(params)), None));
1805 }
1806
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001807 Ok(devices)
David Tolnay2b089fc2019-03-04 15:33:22 -08001808}
1809
1810#[derive(Copy, Clone)]
Chirantan Ekbote1a2683b2019-11-26 16:28:23 +09001811#[cfg_attr(not(feature = "tpm"), allow(dead_code))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001812struct Ids {
1813 uid: uid_t,
1814 gid: gid_t,
1815}
1816
David Tolnay48c48292019-03-01 16:54:25 -08001817// Set the uid/gid for the jailed process and give a basic id map. This is
1818// required for bind mounts to work.
Fergus Dall51200512021-08-19 12:54:26 +10001819fn add_current_user_to_jail(jail: &mut Minijail) -> Result<Ids> {
1820 let crosvm_uid = geteuid();
1821 let crosvm_gid = getegid();
David Tolnay48c48292019-03-01 16:54:25 -08001822
David Tolnay48c48292019-03-01 16:54:25 -08001823 jail.uidmap(&format!("{0} {0} 1", crosvm_uid))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001824 .context("error setting UID map")?;
David Tolnay48c48292019-03-01 16:54:25 -08001825 jail.gidmap(&format!("{0} {0} 1", crosvm_gid))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001826 .context("error setting GID map")?;
David Tolnay48c48292019-03-01 16:54:25 -08001827
Chirantan Ekbotee1663ee2021-09-03 18:31:25 +09001828 if crosvm_uid != 0 {
1829 jail.change_uid(crosvm_uid);
1830 }
1831 if crosvm_gid != 0 {
1832 jail.change_gid(crosvm_gid);
1833 }
Fergus Dall51200512021-08-19 12:54:26 +10001834
David Tolnay41a6f842019-03-01 16:18:44 -08001835 Ok(Ids {
1836 uid: crosvm_uid,
1837 gid: crosvm_gid,
1838 })
David Tolnay48c48292019-03-01 16:54:25 -08001839}
1840
Zach Reizner65b98f12019-11-22 17:34:58 -08001841trait IntoUnixStream {
1842 fn into_unix_stream(self) -> Result<UnixStream>;
1843}
1844
1845impl<'a> IntoUnixStream for &'a Path {
1846 fn into_unix_stream(self) -> Result<UnixStream> {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001847 if let Some(fd) = safe_descriptor_from_path(self).context("failed to open event device")? {
Andrew Walbranbc55e302021-07-13 17:35:10 +01001848 Ok(fd.into())
Zach Reizner65b98f12019-11-22 17:34:58 -08001849 } else {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001850 UnixStream::connect(self).context("failed to open event device")
Zach Reizner65b98f12019-11-22 17:34:58 -08001851 }
1852 }
1853}
1854impl<'a> IntoUnixStream for &'a PathBuf {
1855 fn into_unix_stream(self) -> Result<UnixStream> {
1856 self.as_path().into_unix_stream()
1857 }
1858}
1859
1860impl IntoUnixStream for UnixStream {
1861 fn into_unix_stream(self) -> Result<UnixStream> {
1862 Ok(self)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001863 }
1864}
1865
Steven Richmanf32d0b42020-06-20 21:45:32 -07001866fn setup_vcpu_signal_handler<T: Vcpu>(use_hypervisor_signals: bool) -> Result<()> {
1867 if use_hypervisor_signals {
Matt Delco84cf9c02019-10-07 22:38:13 -07001868 unsafe {
Allen Webb44c728c2021-03-23 15:22:41 -05001869 extern "C" fn handle_signal(_: c_int) {}
Matt Delco84cf9c02019-10-07 22:38:13 -07001870 // Our signal handler does nothing and is trivially async signal safe.
1871 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001872 .context("error registering signal handler")?;
Matt Delco84cf9c02019-10-07 22:38:13 -07001873 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07001874 block_signal(SIGRTMIN() + 0).context("failed to block signal")?;
Matt Delco84cf9c02019-10-07 22:38:13 -07001875 } else {
1876 unsafe {
Allen Webb44c728c2021-03-23 15:22:41 -05001877 extern "C" fn handle_signal<T: Vcpu>(_: c_int) {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001878 T::set_local_immediate_exit(true);
Matt Delco84cf9c02019-10-07 22:38:13 -07001879 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001880 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal::<T>)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001881 .context("error registering signal handler")?;
Matt Delco84cf9c02019-10-07 22:38:13 -07001882 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001883 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001884 Ok(())
1885}
1886
Steven Richmanf32d0b42020-06-20 21:45:32 -07001887// Sets up a vcpu and converts it into a runnable vcpu.
Zach Reizner2c770e62020-09-30 16:49:59 -07001888fn runnable_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001889 cpu_id: usize,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001890 kvm_vcpu_id: usize,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001891 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07001892 vm: impl VmArch,
Zach Reiznerdc748482021-04-14 13:59:30 -07001893 irq_chip: &mut dyn IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001894 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001895 run_rt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001896 vcpu_affinity: Vec<usize>,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001897 no_smt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001898 has_bios: bool,
1899 use_hypervisor_signals: bool,
Yusuke Sato31e136a2021-08-18 11:51:38 -07001900 enable_per_vm_core_scheduling: bool,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001901 host_cpu_topology: bool,
Zach Reizner2c770e62020-09-30 16:49:59 -07001902) -> Result<(V, VcpuRunHandle)>
Steven Richmanf32d0b42020-06-20 21:45:32 -07001903where
Zach Reizner2c770e62020-09-30 16:49:59 -07001904 V: VcpuArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001905{
Zach Reizner304e7312020-09-29 16:00:24 -07001906 let mut vcpu = match vcpu {
1907 Some(v) => v,
1908 None => {
1909 // If vcpu is None, it means this arch/hypervisor requires create_vcpu to be called from
1910 // the vcpu thread.
1911 match vm
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001912 .create_vcpu(kvm_vcpu_id)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001913 .context("failed to create vcpu")?
Zach Reizner304e7312020-09-29 16:00:24 -07001914 .downcast::<V>()
1915 {
1916 Ok(v) => *v,
1917 Err(_) => panic!("VM created wrong type of VCPU"),
1918 }
1919 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001920 };
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001921
Steven Richmanf32d0b42020-06-20 21:45:32 -07001922 irq_chip
Zach Reizner304e7312020-09-29 16:00:24 -07001923 .add_vcpu(cpu_id, &vcpu)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001924 .context("failed to add vcpu to irq chip")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07001925
Daniel Verkampcaf9ced2020-09-29 15:35:02 -07001926 if !vcpu_affinity.is_empty() {
1927 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
1928 error!("Failed to set CPU affinity: {}", e);
1929 }
1930 }
1931
Steven Richmanf32d0b42020-06-20 21:45:32 -07001932 Arch::configure_vcpu(
1933 vm.get_memory(),
1934 vm.get_hypervisor(),
1935 irq_chip,
1936 &mut vcpu,
1937 cpu_id,
1938 vcpu_count,
1939 has_bios,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001940 no_smt,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001941 host_cpu_topology,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001942 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001943 .context("failed to configure vcpu")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07001944
Yusuke Sato31e136a2021-08-18 11:51:38 -07001945 if !enable_per_vm_core_scheduling {
1946 // Do per-vCPU core scheduling by setting a unique cookie to each vCPU.
1947 if let Err(e) = enable_core_scheduling() {
1948 error!("Failed to enable core scheduling: {}", e);
1949 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001950 }
1951
Kansho Nishidaab205af2020-08-13 18:17:50 +09001952 if run_rt {
1953 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
1954 if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
1955 .and_then(|_| set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL)))
1956 {
1957 warn!("Failed to set vcpu to real time: {}", e);
1958 }
1959 }
1960
Steven Richmanf32d0b42020-06-20 21:45:32 -07001961 if use_hypervisor_signals {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001962 let mut v = get_blocked_signals().context("failed to retrieve signal mask for vcpu")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07001963 v.retain(|&x| x != SIGRTMIN() + 0);
Daniel Verkamp6b298582021-08-16 15:37:11 -07001964 vcpu.set_signal_mask(&v)
1965 .context("failed to set the signal mask for vcpu")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07001966 }
1967
Zach Reizner2c770e62020-09-30 16:49:59 -07001968 let vcpu_run_handle = vcpu
1969 .take_run_handle(Some(SIGRTMIN() + 0))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001970 .context("failed to set thread id for vcpu")?;
Zach Reizner2c770e62020-09-30 16:49:59 -07001971
1972 Ok((vcpu, vcpu_run_handle))
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001973}
1974
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001975#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1976fn handle_debug_msg<V>(
1977 cpu_id: usize,
1978 vcpu: &V,
1979 guest_mem: &GuestMemory,
1980 d: VcpuDebug,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001981 reply_tube: &mpsc::Sender<VcpuDebugStatusMessage>,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001982) -> Result<()>
1983where
1984 V: VcpuArch + 'static,
1985{
1986 match d {
1987 VcpuDebug::ReadRegs => {
1988 let msg = VcpuDebugStatusMessage {
1989 cpu: cpu_id as usize,
1990 msg: VcpuDebugStatus::RegValues(
Daniel Verkamp6b298582021-08-16 15:37:11 -07001991 Arch::debug_read_registers(vcpu as &V)
1992 .context("failed to handle a gdb ReadRegs command")?,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001993 ),
1994 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001995 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001996 .send(msg)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001997 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001998 }
1999 VcpuDebug::WriteRegs(regs) => {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002000 Arch::debug_write_registers(vcpu as &V, &regs)
2001 .context("failed to handle a gdb WriteRegs command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002002 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002003 .send(VcpuDebugStatusMessage {
2004 cpu: cpu_id as usize,
2005 msg: VcpuDebugStatus::CommandComplete,
2006 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002007 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002008 }
2009 VcpuDebug::ReadMem(vaddr, len) => {
2010 let msg = VcpuDebugStatusMessage {
2011 cpu: cpu_id as usize,
2012 msg: VcpuDebugStatus::MemoryRegion(
2013 Arch::debug_read_memory(vcpu as &V, guest_mem, vaddr, len)
2014 .unwrap_or(Vec::new()),
2015 ),
2016 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002017 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002018 .send(msg)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002019 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002020 }
2021 VcpuDebug::WriteMem(vaddr, buf) => {
2022 Arch::debug_write_memory(vcpu as &V, guest_mem, vaddr, &buf)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002023 .context("failed to handle a gdb WriteMem command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002024 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002025 .send(VcpuDebugStatusMessage {
2026 cpu: cpu_id as usize,
2027 msg: VcpuDebugStatus::CommandComplete,
2028 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002029 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002030 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002031 VcpuDebug::EnableSinglestep => {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002032 Arch::debug_enable_singlestep(vcpu as &V)
2033 .context("failed to handle a gdb EnableSingleStep command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002034 reply_tube
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002035 .send(VcpuDebugStatusMessage {
2036 cpu: cpu_id as usize,
2037 msg: VcpuDebugStatus::CommandComplete,
2038 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002039 .context("failed to send a debug status to GDB thread")
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002040 }
2041 VcpuDebug::SetHwBreakPoint(addrs) => {
2042 Arch::debug_set_hw_breakpoints(vcpu as &V, &addrs)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002043 .context("failed to handle a gdb SetHwBreakPoint command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002044 reply_tube
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002045 .send(VcpuDebugStatusMessage {
2046 cpu: cpu_id as usize,
2047 msg: VcpuDebugStatus::CommandComplete,
2048 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002049 .context("failed to send a debug status to GDB thread")
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002050 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002051 }
2052}
2053
Zach Reizner2c770e62020-09-30 16:49:59 -07002054fn run_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002055 cpu_id: usize,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002056 kvm_vcpu_id: usize,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002057 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07002058 vm: impl VmArch + 'static,
Zach Reiznerdc748482021-04-14 13:59:30 -07002059 mut irq_chip: Box<dyn IrqChipArch + 'static>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002060 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002061 run_rt: bool,
Daniel Verkamp107edb32019-04-05 09:58:48 -07002062 vcpu_affinity: Vec<usize>,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002063 delay_rt: bool,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002064 no_smt: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07002065 start_barrier: Arc<Barrier>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002066 has_bios: bool,
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07002067 mut io_bus: devices::Bus,
2068 mut mmio_bus: devices::Bus,
Michael Hoyle685316f2020-09-16 15:29:20 -07002069 exit_evt: Event,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002070 requires_pvclock_ctrl: bool,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002071 from_main_tube: mpsc::Receiver<VcpuControl>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002072 use_hypervisor_signals: bool,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002073 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_tube: Option<
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002074 mpsc::Sender<VcpuDebugStatusMessage>,
2075 >,
Yusuke Sato31e136a2021-08-18 11:51:38 -07002076 enable_per_vm_core_scheduling: bool,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002077 host_cpu_topology: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002078) -> Result<JoinHandle<()>>
2079where
Zach Reizner2c770e62020-09-30 16:49:59 -07002080 V: VcpuArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002081{
Zach Reizner8fb52112017-12-13 16:04:39 -08002082 thread::Builder::new()
2083 .name(format!("crosvm_vcpu{}", cpu_id))
2084 .spawn(move || {
Zach Reizner95885312020-01-29 18:06:01 -08002085 // The VCPU thread must trigger the `exit_evt` in all paths, and a `ScopedEvent`'s Drop
2086 // implementation accomplishes that.
2087 let _scoped_exit_evt = ScopedEvent::from(exit_evt);
2088
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002089 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2090 let guest_mem = vm.get_memory().clone();
Zach Reizner2c770e62020-09-30 16:49:59 -07002091 let runnable_vcpu = runnable_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002092 cpu_id,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002093 kvm_vcpu_id,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002094 vcpu,
2095 vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07002096 irq_chip.as_mut(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002097 vcpu_count,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002098 run_rt && !delay_rt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002099 vcpu_affinity,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002100 no_smt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002101 has_bios,
2102 use_hypervisor_signals,
Yusuke Sato31e136a2021-08-18 11:51:38 -07002103 enable_per_vm_core_scheduling,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002104 host_cpu_topology,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002105 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08002106
Zach Reizner8fb52112017-12-13 16:04:39 -08002107 start_barrier.wait();
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002108
Zach Reizner2c770e62020-09-30 16:49:59 -07002109 let (vcpu, vcpu_run_handle) = match runnable_vcpu {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002110 Ok(v) => v,
2111 Err(e) => {
2112 error!("failed to start vcpu {}: {}", cpu_id, e);
2113 return;
2114 }
2115 };
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002116
Dylan Reidb0492662019-05-17 14:50:13 -07002117 let mut run_mode = VmRunMode::Running;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002118 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002119 if to_gdb_tube.is_some() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002120 // Wait until a GDB client attaches
2121 run_mode = VmRunMode::Breakpoint;
2122 }
2123
Dylan Reidb0492662019-05-17 14:50:13 -07002124 let mut interrupted_by_signal = false;
2125
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07002126 mmio_bus.set_access_id(cpu_id);
2127 io_bus.set_access_id(cpu_id);
2128
Dylan Reidb0492662019-05-17 14:50:13 -07002129 'vcpu_loop: loop {
2130 // Start by checking for messages to process and the run state of the CPU.
2131 // An extra check here for Running so there isn't a need to call recv unless a
2132 // message is likely to be ready because a signal was sent.
2133 if interrupted_by_signal || run_mode != VmRunMode::Running {
2134 'state_loop: loop {
2135 // Tries to get a pending message without blocking first.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002136 let msg = match from_main_tube.try_recv() {
Dylan Reidb0492662019-05-17 14:50:13 -07002137 Ok(m) => m,
2138 Err(mpsc::TryRecvError::Empty) if run_mode == VmRunMode::Running => {
2139 // If the VM is running and no message is pending, the state won't
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002140 // change.
Dylan Reidb0492662019-05-17 14:50:13 -07002141 break 'state_loop;
2142 }
2143 Err(mpsc::TryRecvError::Empty) => {
2144 // If the VM is not running, wait until a message is ready.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002145 match from_main_tube.recv() {
Dylan Reidb0492662019-05-17 14:50:13 -07002146 Ok(m) => m,
2147 Err(mpsc::RecvError) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002148 error!("Failed to read from main tube in vcpu");
Dylan Reidb0492662019-05-17 14:50:13 -07002149 break 'vcpu_loop;
2150 }
2151 }
2152 }
2153 Err(mpsc::TryRecvError::Disconnected) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002154 error!("Failed to read from main tube in vcpu");
Dylan Reidb0492662019-05-17 14:50:13 -07002155 break 'vcpu_loop;
2156 }
2157 };
2158
2159 // Collect all pending messages.
2160 let mut messages = vec![msg];
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002161 messages.append(&mut from_main_tube.try_iter().collect());
Dylan Reidb0492662019-05-17 14:50:13 -07002162
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002163 for msg in messages {
2164 match msg {
2165 VcpuControl::RunState(new_mode) => {
2166 run_mode = new_mode;
2167 match run_mode {
2168 VmRunMode::Running => break 'state_loop,
2169 VmRunMode::Suspending => {
2170 // On KVM implementations that use a paravirtualized
2171 // clock (e.g. x86), a flag must be set to indicate to
2172 // the guest kernel that a vCPU was suspended. The guest
2173 // kernel will use this flag to prevent the soft lockup
2174 // detection from triggering when this vCPU resumes,
2175 // which could happen days later in realtime.
2176 if requires_pvclock_ctrl {
2177 if let Err(e) = vcpu.pvclock_ctrl() {
2178 error!(
2179 "failed to tell hypervisor vcpu {} is suspending: {}",
2180 cpu_id, e
2181 );
2182 }
2183 }
2184 }
2185 VmRunMode::Breakpoint => {}
2186 VmRunMode::Exiting => break 'vcpu_loop,
2187 }
2188 }
2189 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2190 VcpuControl::Debug(d) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002191 match &to_gdb_tube {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002192 Some(ref ch) => {
2193 if let Err(e) = handle_debug_msg(
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07002194 cpu_id, &vcpu, &guest_mem, d, ch,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002195 ) {
2196 error!("Failed to handle gdb message: {}", e);
2197 }
2198 },
2199 None => {
2200 error!("VcpuControl::Debug received while GDB feature is disabled: {:?}", d);
Dylan Reidb0492662019-05-17 14:50:13 -07002201 }
2202 }
2203 }
Suleiman Souhlal2ac78b92021-02-01 12:33:26 +09002204 VcpuControl::MakeRT => {
2205 if run_rt && delay_rt {
2206 info!("Making vcpu {} RT\n", cpu_id);
2207 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
2208 if let Err(e) = set_rt_prio_limit(
2209 u64::from(DEFAULT_VCPU_RT_LEVEL))
2210 .and_then(|_|
2211 set_rt_round_robin(
2212 i32::from(DEFAULT_VCPU_RT_LEVEL)
2213 ))
2214 {
2215 warn!("Failed to set vcpu to real time: {}", e);
2216 }
2217 }
2218 }
Dylan Reidb0492662019-05-17 14:50:13 -07002219 }
2220 }
2221 }
2222 }
2223
2224 interrupted_by_signal = false;
2225
Steven Richman11dc6712020-09-02 15:39:14 -07002226 // Vcpus may have run a HLT instruction, which puts them into a state other than
2227 // VcpuRunState::Runnable. In that case, this call to wait_until_runnable blocks
2228 // until either the irqchip receives an interrupt for this vcpu, or until the main
2229 // thread kicks this vcpu as a result of some VmControl operation. In most IrqChip
2230 // implementations HLT instructions do not make it to crosvm, and thus this is a
2231 // no-op that always returns VcpuRunState::Runnable.
2232 match irq_chip.wait_until_runnable(&vcpu) {
2233 Ok(VcpuRunState::Runnable) => {}
2234 Ok(VcpuRunState::Interrupted) => interrupted_by_signal = true,
2235 Err(e) => error!(
2236 "error waiting for vcpu {} to become runnable: {}",
2237 cpu_id, e
2238 ),
2239 }
2240
2241 if !interrupted_by_signal {
2242 match vcpu.run(&vcpu_run_handle) {
2243 Ok(VcpuExit::IoIn { port, mut size }) => {
2244 let mut data = [0; 8];
2245 if size > data.len() {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002246 error!("unsupported IoIn size of {} bytes at port {:#x}", size, port);
Steven Richman11dc6712020-09-02 15:39:14 -07002247 size = data.len();
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002248 }
Steven Richman11dc6712020-09-02 15:39:14 -07002249 io_bus.read(port as u64, &mut data[..size]);
2250 if let Err(e) = vcpu.set_data(&data[..size]) {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002251 error!("failed to set return data for IoIn at port {:#x}: {}", port, e);
Steven Richman11dc6712020-09-02 15:39:14 -07002252 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002253 }
Steven Richman11dc6712020-09-02 15:39:14 -07002254 Ok(VcpuExit::IoOut {
2255 port,
2256 mut size,
2257 data,
2258 }) => {
2259 if size > data.len() {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002260 error!("unsupported IoOut size of {} bytes at port {:#x}", size, port);
Steven Richman11dc6712020-09-02 15:39:14 -07002261 size = data.len();
2262 }
2263 io_bus.write(port as u64, &data[..size]);
2264 }
2265 Ok(VcpuExit::MmioRead { address, size }) => {
2266 let mut data = [0; 8];
2267 mmio_bus.read(address, &mut data[..size]);
2268 // Setting data for mmio can not fail.
2269 let _ = vcpu.set_data(&data[..size]);
2270 }
2271 Ok(VcpuExit::MmioWrite {
2272 address,
2273 size,
2274 data,
2275 }) => {
2276 mmio_bus.write(address, &data[..size]);
2277 }
2278 Ok(VcpuExit::IoapicEoi { vector }) => {
2279 if let Err(e) = irq_chip.broadcast_eoi(vector) {
2280 error!(
2281 "failed to broadcast eoi {} on vcpu {}: {}",
2282 vector, cpu_id, e
2283 );
2284 }
2285 }
2286 Ok(VcpuExit::IrqWindowOpen) => {}
Leo Lai558460f2021-07-23 05:32:27 +00002287 Ok(VcpuExit::Hlt) => irq_chip.halted(cpu_id),
Steven Richman11dc6712020-09-02 15:39:14 -07002288 Ok(VcpuExit::Shutdown) => break,
2289 Ok(VcpuExit::FailEntry {
2290 hardware_entry_failure_reason,
2291 }) => {
2292 error!("vcpu hw run failure: {:#x}", hardware_entry_failure_reason);
Steven Richmanf32d0b42020-06-20 21:45:32 -07002293 break;
2294 }
Steven Richman11dc6712020-09-02 15:39:14 -07002295 Ok(VcpuExit::SystemEvent(_, _)) => break,
2296 Ok(VcpuExit::Debug { .. }) => {
2297 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2298 {
2299 let msg = VcpuDebugStatusMessage {
2300 cpu: cpu_id as usize,
2301 msg: VcpuDebugStatus::HitBreakPoint,
2302 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002303 if let Some(ref ch) = to_gdb_tube {
Steven Richman11dc6712020-09-02 15:39:14 -07002304 if let Err(e) = ch.send(msg) {
2305 error!("failed to notify breakpoint to GDB thread: {}", e);
2306 break;
2307 }
2308 }
2309 run_mode = VmRunMode::Breakpoint;
2310 }
2311 }
2312 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
2313 Err(e) => match e.errno() {
2314 libc::EINTR => interrupted_by_signal = true,
2315 libc::EAGAIN => {}
2316 _ => {
2317 error!("vcpu hit unknown error: {}", e);
2318 break;
2319 }
2320 },
2321 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002322 }
2323
2324 if interrupted_by_signal {
2325 if use_hypervisor_signals {
2326 // Try to clear the signal that we use to kick VCPU if it is pending before
2327 // attempting to handle pause requests.
2328 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
2329 error!("failed to clear pending signal: {}", e);
2330 break;
2331 }
2332 } else {
2333 vcpu.set_immediate_exit(false);
2334 }
David Tolnay8f3a2322018-11-30 17:11:35 -08002335 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002336
Steven Richman11dc6712020-09-02 15:39:14 -07002337 if let Err(e) = irq_chip.inject_interrupts(&vcpu) {
2338 error!("failed to inject interrupts for vcpu {}: {}", cpu_id, e);
2339 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002340 }
David Tolnay2bac1e72018-12-12 14:33:42 -08002341 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002342 .context("failed to spawn VCPU thread")
Zach Reizner39aa26b2017-12-12 18:03:23 -08002343}
2344
Zach Reiznera90649a2021-03-31 12:56:08 -07002345fn setup_vm_components(cfg: &Config) -> Result<VmComponents> {
David Tolnay2b089fc2019-03-04 15:33:22 -08002346 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +01002347 Some(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002348 open_file(
2349 initrd_path,
2350 true, /*read_only*/
2351 false, /*O_DIRECT*/
2352 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002353 .with_context(|| format!("failed to open initrd {}", initrd_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +01002354 )
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002355 } else {
2356 None
2357 };
2358
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002359 let vm_image = match cfg.executable_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +01002360 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002361 open_file(
2362 kernel_path,
2363 true, /*read_only*/
2364 false, /*O_DIRECT*/
2365 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002366 .with_context(|| format!("failed to open kernel image {}", kernel_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +01002367 ),
2368 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002369 open_file(bios_path, true /*read_only*/, false /*O_DIRECT*/)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002370 .with_context(|| format!("failed to open bios {}", bios_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +01002371 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002372 _ => panic!("Did not receive a bios or kernel, should be impossible."),
2373 };
2374
Will Deaconc48e7832021-07-30 19:03:06 +01002375 let swiotlb = if let Some(size) = cfg.swiotlb {
2376 Some(
2377 size.checked_mul(1024 * 1024)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002378 .ok_or_else(|| anyhow!("requested swiotlb size too large"))?,
Will Deaconc48e7832021-07-30 19:03:06 +01002379 )
2380 } else {
2381 match cfg.protected_vm {
2382 ProtectionType::Protected => Some(64 * 1024 * 1024),
2383 ProtectionType::Unprotected => None,
2384 }
2385 };
2386
Zach Reiznera90649a2021-03-31 12:56:08 -07002387 Ok(VmComponents {
Daniel Verkamp6a847062019-11-26 13:16:35 -08002388 memory_size: cfg
2389 .memory
2390 .unwrap_or(256)
2391 .checked_mul(1024 * 1024)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002392 .ok_or_else(|| anyhow!("requested memory size too large"))?,
Will Deaconc48e7832021-07-30 19:03:06 +01002393 swiotlb,
Dylan Reid059a1882018-07-23 17:58:09 -07002394 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -07002395 vcpu_affinity: cfg.vcpu_affinity.clone(),
Daniel Verkamp8a72afc2021-03-15 17:55:52 -07002396 cpu_clusters: cfg.cpu_clusters.clone(),
2397 cpu_capacity: cfg.cpu_capacity.clone(),
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002398 no_smt: cfg.no_smt,
Sergey Senozhatsky1e369c52021-04-13 20:23:51 +09002399 hugepages: cfg.hugepages,
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002400 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002401 android_fstab: cfg
2402 .android_fstab
2403 .as_ref()
Daniel Verkamp6b298582021-08-16 15:37:11 -07002404 .map(|x| {
2405 File::open(x)
2406 .with_context(|| format!("failed to open android fstab file {}", x.display()))
2407 })
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002408 .map_or(Ok(None), |v| v.map(Some))?,
Kansho Nishida282115b2019-12-18 13:13:14 +09002409 pstore: cfg.pstore.clone(),
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002410 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -07002411 extra_kernel_params: cfg.params.clone(),
Tomasz Jeznach42644642020-05-20 23:27:59 -07002412 acpi_sdts: cfg
2413 .acpi_tables
2414 .iter()
Daniel Verkamp6b298582021-08-16 15:37:11 -07002415 .map(|path| {
2416 SDT::from_file(path)
2417 .with_context(|| format!("failed to open ACPI file {}", path.display()))
2418 })
Tomasz Jeznach42644642020-05-20 23:27:59 -07002419 .collect::<Result<Vec<SDT>>>()?,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002420 rt_cpus: cfg.rt_cpus.clone(),
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002421 delay_rt: cfg.delay_rt,
Will Deacon7d2b8ac2020-10-06 18:51:12 +01002422 protected_vm: cfg.protected_vm,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002423 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznera90649a2021-03-31 12:56:08 -07002424 gdb: None,
Tomasz Jeznachccb26942021-03-30 22:44:11 -07002425 dmi_path: cfg.dmi_path.clone(),
Tomasz Jeznachd93c29f2021-04-12 11:00:24 -07002426 no_legacy: cfg.no_legacy,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002427 host_cpu_topology: cfg.host_cpu_topology,
Zach Reiznera90649a2021-03-31 12:56:08 -07002428 })
2429}
2430
Zach Reiznerdc748482021-04-14 13:59:30 -07002431pub fn run_config(cfg: Config) -> Result<()> {
2432 let components = setup_vm_components(&cfg)?;
2433
2434 let guest_mem_layout =
Daniel Verkamp6b298582021-08-16 15:37:11 -07002435 Arch::guest_memory_layout(&components).context("failed to create guest memory layout")?;
2436 let guest_mem = GuestMemory::new(&guest_mem_layout).context("failed to create guest memory")?;
Zach Reiznerdc748482021-04-14 13:59:30 -07002437 let mut mem_policy = MemoryPolicy::empty();
2438 if components.hugepages {
2439 mem_policy |= MemoryPolicy::USE_HUGEPAGES;
2440 }
Will Deaconb9755462021-11-03 09:52:21 +00002441 if components.protected_vm == ProtectionType::Protected {
2442 mem_policy |= MemoryPolicy::MLOCK_ON_FAULT;
2443 }
2444 guest_mem
2445 .set_memory_policy(mem_policy)
2446 .context("failed to set guest memory policy")?;
Daniel Verkamp6b298582021-08-16 15:37:11 -07002447 let kvm = Kvm::new_with_path(&cfg.kvm_device_path).context("failed to create kvm")?;
2448 let vm = KvmVm::new(&kvm, guest_mem).context("failed to create vm")?;
2449 let vm_clone = vm.try_clone().context("failed to clone vm")?;
Zach Reiznerdc748482021-04-14 13:59:30 -07002450
2451 enum KvmIrqChip {
2452 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2453 Split(KvmSplitIrqChip),
2454 Kernel(KvmKernelIrqChip),
2455 }
2456
2457 impl KvmIrqChip {
2458 fn as_mut(&mut self) -> &mut dyn IrqChipArch {
2459 match self {
2460 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2461 KvmIrqChip::Split(i) => i,
2462 KvmIrqChip::Kernel(i) => i,
2463 }
2464 }
2465 }
2466
2467 let ioapic_host_tube;
2468 let mut irq_chip = if cfg.split_irqchip {
2469 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
2470 unimplemented!("KVM split irqchip mode only supported on x86 processors");
2471 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2472 {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002473 let (host_tube, ioapic_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerdc748482021-04-14 13:59:30 -07002474 ioapic_host_tube = Some(host_tube);
2475 KvmIrqChip::Split(
2476 KvmSplitIrqChip::new(
2477 vm_clone,
2478 components.vcpu_count,
2479 ioapic_device_tube,
2480 Some(120),
2481 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002482 .context("failed to create IRQ chip")?,
Zach Reiznerdc748482021-04-14 13:59:30 -07002483 )
2484 }
2485 } else {
2486 ioapic_host_tube = None;
2487 KvmIrqChip::Kernel(
Daniel Verkamp6b298582021-08-16 15:37:11 -07002488 KvmKernelIrqChip::new(vm_clone, components.vcpu_count)
2489 .context("failed to create IRQ chip")?,
Zach Reiznerdc748482021-04-14 13:59:30 -07002490 )
2491 };
2492
2493 run_vm::<KvmVcpu, KvmVm>(cfg, components, vm, irq_chip.as_mut(), ioapic_host_tube)
2494}
2495
2496fn run_vm<Vcpu, V>(
Zach Reiznera90649a2021-03-31 12:56:08 -07002497 cfg: Config,
2498 #[allow(unused_mut)] mut components: VmComponents,
Zach Reiznerdc748482021-04-14 13:59:30 -07002499 mut vm: V,
2500 irq_chip: &mut dyn IrqChipArch,
2501 ioapic_host_tube: Option<Tube>,
Zach Reiznera90649a2021-03-31 12:56:08 -07002502) -> Result<()>
2503where
2504 Vcpu: VcpuArch + 'static,
2505 V: VmArch + 'static,
Zach Reiznera90649a2021-03-31 12:56:08 -07002506{
2507 if cfg.sandbox {
2508 // Printing something to the syslog before entering minijail so that libc's syslogger has a
2509 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
2510 // access to those files will not be possible.
2511 info!("crosvm entering multiprocess mode");
2512 }
2513
Daniel Verkampf1439d42021-05-21 13:55:10 -07002514 #[cfg(feature = "usb")]
Zach Reiznera90649a2021-03-31 12:56:08 -07002515 let (usb_control_tube, usb_provider) =
Daniel Verkamp6b298582021-08-16 15:37:11 -07002516 HostBackendDeviceProvider::new().context("failed to create usb provider")?;
Daniel Verkampf1439d42021-05-21 13:55:10 -07002517
Zach Reiznera90649a2021-03-31 12:56:08 -07002518 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
2519 // before any jailed devices have been spawned, so that we can catch any of them that fail very
2520 // quickly.
Daniel Verkamp6b298582021-08-16 15:37:11 -07002521 let sigchld_fd = SignalFd::new(libc::SIGCHLD).context("failed to create signalfd")?;
Dylan Reid059a1882018-07-23 17:58:09 -07002522
Zach Reiznera60744b2019-02-13 17:33:32 -08002523 let control_server_socket = match &cfg.socket_path {
2524 Some(path) => Some(UnlinkUnixSeqpacketListener(
Daniel Verkamp6b298582021-08-16 15:37:11 -07002525 UnixSeqpacketListener::bind(path).context("failed to create control server")?,
Zach Reiznera60744b2019-02-13 17:33:32 -08002526 )),
2527 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07002528 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002529
Zach Reiznera90649a2021-03-31 12:56:08 -07002530 let mut control_tubes = Vec::new();
2531
2532 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2533 if let Some(port) = cfg.gdb {
2534 // GDB needs a control socket to interrupt vcpus.
Daniel Verkamp6b298582021-08-16 15:37:11 -07002535 let (gdb_host_tube, gdb_control_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznera90649a2021-03-31 12:56:08 -07002536 control_tubes.push(TaggedControlTube::Vm(gdb_host_tube));
2537 components.gdb = Some((port, gdb_control_tube));
2538 }
2539
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09002540 for wl_cfg in &cfg.vhost_user_wl {
2541 let wayland_host_tube = UnixSeqpacket::connect(&wl_cfg.vm_tube)
2542 .map(Tube::new)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002543 .context("failed to connect to wayland tube")?;
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09002544 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
2545 }
2546
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002547 let mut vhost_user_gpu_tubes = Vec::with_capacity(cfg.vhost_user_gpu.len());
2548 for _ in 0..cfg.vhost_user_gpu.len() {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002549 let (host_tube, device_tube) = Tube::pair().context("failed to create tube")?;
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002550 vhost_user_gpu_tubes.push((
Daniel Verkamp6b298582021-08-16 15:37:11 -07002551 host_tube.try_clone().context("failed to clone tube")?,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002552 device_tube,
2553 ));
2554 control_tubes.push(TaggedControlTube::VmMemory(host_tube));
2555 }
2556
Daniel Verkamp6b298582021-08-16 15:37:11 -07002557 let (wayland_host_tube, wayland_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002558 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
Dylan Reid059a1882018-07-23 17:58:09 -07002559 // Balloon gets a special socket so balloon requests can be forwarded from the main process.
Daniel Verkamp6b298582021-08-16 15:37:11 -07002560 let (balloon_host_tube, balloon_device_tube) = Tube::pair().context("failed to create tube")?;
Hikaru Nishidaaf3f3bb2021-05-21 12:03:54 +09002561 // Set recv timeout to avoid deadlock on sending BalloonControlCommand before guest is ready.
2562 balloon_host_tube
2563 .set_recv_timeout(Some(Duration::from_millis(100)))
Daniel Verkamp6b298582021-08-16 15:37:11 -07002564 .context("failed to create tube")?;
Dylan Reid059a1882018-07-23 17:58:09 -07002565
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002566 // Create one control socket per disk.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002567 let mut disk_device_tubes = Vec::new();
2568 let mut disk_host_tubes = Vec::new();
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002569 let disk_count = cfg.disks.len();
2570 for _ in 0..disk_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002571 let (disk_host_tub, disk_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002572 disk_host_tubes.push(disk_host_tub);
2573 disk_device_tubes.push(disk_device_tube);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002574 }
2575
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002576 let mut pmem_device_tubes = Vec::new();
Daniel Verkampe1980a92020-02-07 11:00:55 -08002577 let pmem_count = cfg.pmem_devices.len();
2578 for _ in 0..pmem_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002579 let (pmem_host_tube, pmem_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002580 pmem_device_tubes.push(pmem_device_tube);
2581 control_tubes.push(TaggedControlTube::VmMsync(pmem_host_tube));
Daniel Verkampe1980a92020-02-07 11:00:55 -08002582 }
2583
Daniel Verkamp6b298582021-08-16 15:37:11 -07002584 let (gpu_host_tube, gpu_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002585 control_tubes.push(TaggedControlTube::VmMemory(gpu_host_tube));
Gurchetan Singh96beafc2019-05-15 09:46:52 -07002586
Zach Reiznerdc748482021-04-14 13:59:30 -07002587 if let Some(ioapic_host_tube) = ioapic_host_tube {
2588 control_tubes.push(TaggedControlTube::VmIrq(ioapic_host_tube));
2589 }
Zhuocheng Dingf2e90bf2019-12-02 15:50:20 +08002590
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002591 let battery = if cfg.battery_type.is_some() {
Daniel Verkampcfe49462021-08-19 17:11:05 -07002592 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(clippy::manual_map))]
Alex Lauf408c732020-11-10 18:24:04 +09002593 let jail = match simple_jail(&cfg, "battery")? {
Daniel Verkampcfe49462021-08-19 17:11:05 -07002594 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(unused_mut))]
Alex Lauf408c732020-11-10 18:24:04 +09002595 Some(mut jail) => {
2596 // Setup a bind mount to the system D-Bus socket if the powerd monitor is used.
2597 #[cfg(feature = "power-monitor-powerd")]
2598 {
Fergus Dall51200512021-08-19 12:54:26 +10002599 add_current_user_to_jail(&mut jail)?;
Alex Lauf408c732020-11-10 18:24:04 +09002600
2601 // Create a tmpfs in the device's root directory so that we can bind mount files.
2602 jail.mount_with_data(
2603 Path::new("none"),
2604 Path::new("/"),
2605 "tmpfs",
2606 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
2607 "size=67108864",
2608 )?;
2609
2610 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
2611 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
2612 }
2613 Some(jail)
2614 }
2615 None => None,
2616 };
2617 (&cfg.battery_type, jail)
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002618 } else {
2619 (&cfg.battery_type, None)
2620 };
2621
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002622 let map_request: Arc<Mutex<Option<ExternalMapping>>> = Arc::new(Mutex::new(None));
2623
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002624 let fs_count = cfg
2625 .shared_dirs
2626 .iter()
2627 .filter(|sd| sd.kind == SharedDirKind::FS)
2628 .count();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002629 let mut fs_device_tubes = Vec::with_capacity(fs_count);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002630 for _ in 0..fs_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002631 let (fs_host_tube, fs_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002632 control_tubes.push(TaggedControlTube::Fs(fs_host_tube));
2633 fs_device_tubes.push(fs_device_tube);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002634 }
2635
Daniel Verkamp6b298582021-08-16 15:37:11 -07002636 let exit_evt = Event::new().context("failed to create event")?;
Zach Reiznerdc748482021-04-14 13:59:30 -07002637 let mut sys_allocator = Arch::create_system_allocator(vm.get_memory());
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09002638
2639 // Allocate the ramoops region first. AArch64::build_vm() assumes this.
2640 let ramoops_region = match &components.pstore {
2641 Some(pstore) => Some(
Dennis Kempin65740a62021-10-18 16:46:57 -07002642 arch::pstore::create_memory_region(&mut vm, &mut sys_allocator, pstore)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002643 .context("failed to allocate pstore region")?,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09002644 ),
2645 None => None,
2646 };
2647
Zide Chen71435c12021-03-03 15:02:02 -08002648 let phys_max_addr = Arch::get_phys_max_addr();
Tomasz Nowickiab86d522021-09-22 05:50:46 +00002649 let mut devices = create_devices(
Zach Reiznerdc748482021-04-14 13:59:30 -07002650 &cfg,
2651 &mut vm,
2652 &mut sys_allocator,
2653 &exit_evt,
Zide Chen71435c12021-03-03 15:02:02 -08002654 phys_max_addr,
Zach Reiznerdc748482021-04-14 13:59:30 -07002655 &mut control_tubes,
2656 wayland_device_tube,
2657 gpu_device_tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002658 vhost_user_gpu_tubes,
Zach Reiznerdc748482021-04-14 13:59:30 -07002659 balloon_device_tube,
2660 &mut disk_device_tubes,
2661 &mut pmem_device_tubes,
2662 &mut fs_device_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07002663 #[cfg(feature = "usb")]
Zach Reiznerdc748482021-04-14 13:59:30 -07002664 usb_provider,
2665 Arc::clone(&map_request),
2666 )?;
2667
Peter Fangc2bba082021-04-19 18:40:24 -07002668 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Tomasz Nowickiab86d522021-09-22 05:50:46 +00002669 for device in devices
2670 .iter_mut()
2671 .filter_map(|(dev, _)| dev.as_pci_device_mut())
2672 {
Peter Fangc2bba082021-04-19 18:40:24 -07002673 let sdts = device
2674 .generate_acpi(components.acpi_sdts)
2675 .or_else(|| {
2676 error!("ACPI table generation error");
2677 None
2678 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002679 .ok_or_else(|| anyhow!("failed to generate ACPI table"))?;
Peter Fangc2bba082021-04-19 18:40:24 -07002680 components.acpi_sdts = sdts;
2681 }
2682
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002683 // KVM_CREATE_VCPU uses apic id for x86 and uses cpu id for others.
2684 let mut kvm_vcpu_ids = Vec::new();
2685
Kuo-Hsin Yang6139da62021-04-14 16:55:24 +08002686 #[cfg_attr(not(feature = "direct"), allow(unused_mut))]
Zach Reiznerdc748482021-04-14 13:59:30 -07002687 let mut linux = Arch::build_vm::<V, Vcpu>(
Trent Begin17ccaad2019-04-17 13:51:25 -06002688 components,
Zach Reiznerdc748482021-04-14 13:59:30 -07002689 &exit_evt,
2690 &mut sys_allocator,
Trent Begin17ccaad2019-04-17 13:51:25 -06002691 &cfg.serial_parameters,
Matt Delco45caf912019-11-13 08:11:09 -08002692 simple_jail(&cfg, "serial")?,
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002693 battery,
Zach Reiznera90649a2021-03-31 12:56:08 -07002694 vm,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09002695 ramoops_region,
Tomasz Nowickiab86d522021-09-22 05:50:46 +00002696 devices,
Zach Reiznerdc748482021-04-14 13:59:30 -07002697 irq_chip,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002698 &mut kvm_vcpu_ids,
Trent Begin17ccaad2019-04-17 13:51:25 -06002699 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002700 .context("the architecture failed to build the vm")?;
Lepton Wu60893882018-11-21 11:06:18 -08002701
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08002702 // Create Pcie Root Port
2703 let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new()));
2704 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
2705 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
2706 let sec_bus = (1..255)
2707 .find(|&bus_num| sys_allocator.pci_bus_empty(bus_num))
2708 .context("failed to find empty bus for Pci hotplug")?;
2709 let pci_bridge = Box::new(PciBridge::new(
2710 pcie_root_port.clone(),
2711 msi_device_tube,
2712 0,
2713 sec_bus,
2714 ));
2715 Arch::register_pci_device(&mut linux, pci_bridge, None, &mut sys_allocator)
2716 .context("Failed to configure pci bridge device")?;
2717 linux.hotplug_bus.push(pcie_root_port);
2718
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08002719 #[cfg(feature = "direct")]
2720 if let Some(pmio) = &cfg.direct_pmio {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002721 let direct_io = Arc::new(
2722 devices::DirectIo::new(&pmio.path, false).context("failed to open direct io device")?,
2723 );
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08002724 for range in pmio.ranges.iter() {
2725 linux
2726 .io_bus
2727 .insert_sync(direct_io.clone(), range.0, range.1)
2728 .unwrap();
2729 }
2730 };
2731
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002732 #[cfg(feature = "direct")]
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07002733 if let Some(mmio) = &cfg.direct_mmio {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002734 let direct_io = Arc::new(
2735 devices::DirectIo::new(&mmio.path, false).context("failed to open direct io device")?,
2736 );
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07002737 for range in mmio.ranges.iter() {
2738 linux
2739 .mmio_bus
2740 .insert_sync(direct_io.clone(), range.0, range.1)
2741 .unwrap();
2742 }
2743 };
2744
2745 #[cfg(feature = "direct")]
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002746 let mut irqs = Vec::new();
2747
2748 #[cfg(feature = "direct")]
2749 for irq in &cfg.direct_level_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07002750 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002751 warn!("irq {} already reserved.", irq);
2752 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07002753 let trigger = Event::new().context("failed to create event")?;
2754 let resample = Event::new().context("failed to create event")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002755 linux
2756 .irq_chip
2757 .register_irq_event(*irq, &trigger, Some(&resample))
2758 .unwrap();
Daniel Verkamp6b298582021-08-16 15:37:11 -07002759 let direct_irq = devices::DirectIrq::new(trigger, Some(resample))
2760 .context("failed to enable interrupt forwarding")?;
2761 direct_irq
2762 .irq_enable(*irq)
2763 .context("failed to enable interrupt forwarding")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002764 irqs.push(direct_irq);
2765 }
2766
2767 #[cfg(feature = "direct")]
2768 for irq in &cfg.direct_edge_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07002769 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002770 warn!("irq {} already reserved.", irq);
2771 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07002772 let trigger = Event::new().context("failed to create event")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002773 linux
2774 .irq_chip
2775 .register_irq_event(*irq, &trigger, None)
2776 .unwrap();
Daniel Verkamp6b298582021-08-16 15:37:11 -07002777 let direct_irq = devices::DirectIrq::new(trigger, None)
2778 .context("failed to enable interrupt forwarding")?;
2779 direct_irq
2780 .irq_enable(*irq)
2781 .context("failed to enable interrupt forwarding")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002782 irqs.push(direct_irq);
2783 }
2784
Daniel Verkamp6b298582021-08-16 15:37:11 -07002785 let gralloc = RutabagaGralloc::new().context("failed to create gralloc")?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002786 run_control(
2787 linux,
Zach Reiznerdc748482021-04-14 13:59:30 -07002788 sys_allocator,
Zach Reiznera60744b2019-02-13 17:33:32 -08002789 control_server_socket,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002790 control_tubes,
2791 balloon_host_tube,
2792 &disk_host_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07002793 #[cfg(feature = "usb")]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002794 usb_control_tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07002795 exit_evt,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002796 sigchld_fd,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002797 cfg.sandbox,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002798 Arc::clone(&map_request),
Gurchetan Singh293913c2020-12-09 10:44:13 -08002799 gralloc,
Yusuke Sato31e136a2021-08-18 11:51:38 -07002800 cfg.per_vm_core_scheduling,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002801 cfg.host_cpu_topology,
2802 kvm_vcpu_ids,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002803 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07002804}
2805
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08002806fn get_hp_bus<V: VmArch, Vcpu: VcpuArch>(
2807 linux: &RunnableLinuxVm<V, Vcpu>,
2808 host_addr: PciAddress,
2809) -> Result<(Arc<Mutex<dyn HotPlugBus>>, u8)> {
2810 for hp_bus in linux.hotplug_bus.iter() {
2811 if let Some(number) = hp_bus.lock().is_match(host_addr) {
2812 return Ok((hp_bus.clone(), number));
2813 }
2814 }
2815 Err(anyhow!("Failed to find a suitable hotplug bus"))
2816}
2817
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002818#[allow(dead_code)]
2819fn add_vfio_device<V: VmArch, Vcpu: VcpuArch>(
2820 linux: &mut RunnableLinuxVm<V, Vcpu>,
2821 sys_allocator: &mut SystemAllocator,
2822 cfg: &Config,
2823 control_tubes: &mut Vec<TaggedControlTube>,
2824 vfio_path: &Path,
2825) -> Result<()> {
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08002826 let host_os_str = vfio_path
2827 .file_name()
2828 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
2829 let host_str = host_os_str
2830 .to_str()
2831 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
2832 let host_addr = PciAddress::from_string(host_str);
2833
2834 let (hp_bus, bus_num) = get_hp_bus(linux, host_addr)?;
2835
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002836 let mut endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>> = BTreeMap::new();
2837 let (vfio_pci_device, jail) = create_vfio_device(
2838 cfg,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002839 &linux.vm,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002840 sys_allocator,
2841 control_tubes,
2842 vfio_path,
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08002843 Some(bus_num),
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002844 &mut endpoints,
2845 false,
2846 )?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002847
2848 let pci_address = Arch::register_pci_device(linux, vfio_pci_device, jail, sys_allocator)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002849 .context("Failed to configure pci hotplug device")?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002850
Daniel Verkamp6b298582021-08-16 15:37:11 -07002851 let host_os_str = vfio_path
2852 .file_name()
2853 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
2854 let host_str = host_os_str
2855 .to_str()
2856 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002857 let host_addr = PciAddress::from_string(host_str);
2858 let host_key = HostHotPlugKey::Vfio { host_addr };
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08002859 let mut hp_bus = hp_bus.lock();
2860 hp_bus.add_hotplug_device(host_key, pci_address);
2861 hp_bus.hot_plug(pci_address);
2862 Ok(())
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002863}
2864
2865#[allow(dead_code)]
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002866fn remove_vfio_device<V: VmArch, Vcpu: VcpuArch>(
2867 linux: &RunnableLinuxVm<V, Vcpu>,
2868 vfio_path: &Path,
2869) -> Result<()> {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002870 let host_os_str = vfio_path
2871 .file_name()
2872 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
2873 let host_str = host_os_str
2874 .to_str()
2875 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002876 let host_addr = PciAddress::from_string(host_str);
2877 let host_key = HostHotPlugKey::Vfio { host_addr };
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08002878 for hp_bus in linux.hotplug_bus.iter() {
2879 let mut hp_bus_lock = hp_bus.lock();
2880 if let Some(pci_addr) = hp_bus_lock.get_hotplug_device(host_key) {
2881 hp_bus_lock.hot_unplug(pci_addr);
2882 return Ok(());
2883 }
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002884 }
2885
Daniel Verkamp6b298582021-08-16 15:37:11 -07002886 Err(anyhow!("HotPlugBus hasn't been implemented"))
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002887}
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002888
Daniel Verkamp29409802021-02-24 14:46:19 -08002889/// Signals all running VCPUs to vmexit, sends VcpuControl message to each VCPU tube, and tells
2890/// `irq_chip` to stop blocking halted VCPUs. The channel message is set first because both the
Steven Richman11dc6712020-09-02 15:39:14 -07002891/// signal and the irq_chip kick could cause the VCPU thread to continue through the VCPU run
2892/// loop.
2893fn kick_all_vcpus(
2894 vcpu_handles: &[(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)],
Zach Reiznerdc748482021-04-14 13:59:30 -07002895 irq_chip: &dyn IrqChip,
Daniel Verkamp29409802021-02-24 14:46:19 -08002896 message: VcpuControl,
Steven Richman11dc6712020-09-02 15:39:14 -07002897) {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002898 for (handle, tube) in vcpu_handles {
Daniel Verkamp29409802021-02-24 14:46:19 -08002899 if let Err(e) = tube.send(message.clone()) {
2900 error!("failed to send VcpuControl: {}", e);
Steven Richman11dc6712020-09-02 15:39:14 -07002901 }
2902 let _ = handle.kill(SIGRTMIN() + 0);
2903 }
2904 irq_chip.kick_halted_vcpus();
2905}
2906
Zach Reiznerdc748482021-04-14 13:59:30 -07002907fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
2908 mut linux: RunnableLinuxVm<V, Vcpu>,
2909 mut sys_allocator: SystemAllocator,
Zach Reiznera60744b2019-02-13 17:33:32 -08002910 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002911 mut control_tubes: Vec<TaggedControlTube>,
2912 balloon_host_tube: Tube,
2913 disk_host_tubes: &[Tube],
Daniel Verkampf1439d42021-05-21 13:55:10 -07002914 #[cfg(feature = "usb")] usb_control_tube: Tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07002915 exit_evt: Event,
Zach Reizner55a9e502018-10-03 10:22:32 -07002916 sigchld_fd: SignalFd,
Lepton Wu20333e42019-03-14 10:48:03 -07002917 sandbox: bool,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002918 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Gurchetan Singh293913c2020-12-09 10:44:13 -08002919 mut gralloc: RutabagaGralloc,
Yusuke Sato31e136a2021-08-18 11:51:38 -07002920 enable_per_vm_core_scheduling: bool,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002921 host_cpu_topology: bool,
2922 kvm_vcpu_ids: Vec<usize>,
Zach Reizner55a9e502018-10-03 10:22:32 -07002923) -> Result<()> {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002924 #[derive(PollToken)]
2925 enum Token {
2926 Exit,
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002927 Suspend,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002928 ChildSignal,
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002929 IrqFd { index: IrqEventIndex },
Zach Reiznera60744b2019-02-13 17:33:32 -08002930 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002931 VmControl { index: usize },
2932 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002933
Zach Reizner19ad1f32019-12-12 18:58:50 -08002934 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08002935 .set_raw_mode()
2936 .expect("failed to set terminal raw mode");
2937
Michael Hoylee392c462020-10-07 03:29:24 -07002938 let wait_ctx = WaitContext::build_with(&[
Zach Reiznerdc748482021-04-14 13:59:30 -07002939 (&exit_evt, Token::Exit),
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002940 (&linux.suspend_evt, Token::Suspend),
Zach Reiznerb2110be2019-07-23 15:55:03 -07002941 (&sigchld_fd, Token::ChildSignal),
2942 ])
Daniel Verkamp6b298582021-08-16 15:37:11 -07002943 .context("failed to add descriptor to wait context")?;
Zach Reiznerb2110be2019-07-23 15:55:03 -07002944
Zach Reiznera60744b2019-02-13 17:33:32 -08002945 if let Some(socket_server) = &control_server_socket {
Michael Hoylee392c462020-10-07 03:29:24 -07002946 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08002947 .add(socket_server, Token::VmControlServer)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002948 .context("failed to add descriptor to wait context")?;
Zach Reiznera60744b2019-02-13 17:33:32 -08002949 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002950 for (index, socket) in control_tubes.iter().enumerate() {
Michael Hoylee392c462020-10-07 03:29:24 -07002951 wait_ctx
Zach Reizner55a9e502018-10-03 10:22:32 -07002952 .add(socket.as_ref(), Token::VmControl { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002953 .context("failed to add descriptor to wait context")?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002954 }
2955
Steven Richmanf32d0b42020-06-20 21:45:32 -07002956 let events = linux
2957 .irq_chip
2958 .irq_event_tokens()
Daniel Verkamp6b298582021-08-16 15:37:11 -07002959 .context("failed to add descriptor to wait context")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002960
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002961 for (index, _gsi, evt) in events {
Michael Hoylee392c462020-10-07 03:29:24 -07002962 wait_ctx
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002963 .add(&evt, Token::IrqFd { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002964 .context("failed to add descriptor to wait context")?;
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002965 }
2966
Lepton Wu20333e42019-03-14 10:48:03 -07002967 if sandbox {
2968 // Before starting VCPUs, in case we started with some capabilities, drop them all.
Daniel Verkamp6b298582021-08-16 15:37:11 -07002969 drop_capabilities().context("failed to drop process capabilities")?;
Lepton Wu20333e42019-03-14 10:48:03 -07002970 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08002971
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002972 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2973 // Create a channel for GDB thread.
2974 let (to_gdb_channel, from_vcpu_channel) = if linux.gdb.is_some() {
2975 let (s, r) = mpsc::channel();
2976 (Some(s), Some(r))
2977 } else {
2978 (None, None)
2979 };
2980
Steven Richmanf32d0b42020-06-20 21:45:32 -07002981 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
2982 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
Steven Richmanf32d0b42020-06-20 21:45:32 -07002983 let use_hypervisor_signals = !linux
2984 .vm
2985 .get_hypervisor()
2986 .check_capability(&HypervisorCap::ImmediateExit);
Zach Reizner304e7312020-09-29 16:00:24 -07002987 setup_vcpu_signal_handler::<Vcpu>(use_hypervisor_signals)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002988
Zach Reizner304e7312020-09-29 16:00:24 -07002989 let vcpus: Vec<Option<_>> = match linux.vcpus.take() {
Andrew Walbran9cfdbd92021-01-11 17:40:34 +00002990 Some(vec) => vec.into_iter().map(Some).collect(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002991 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
2992 };
Yusuke Sato31e136a2021-08-18 11:51:38 -07002993 // Enable core scheduling before creating vCPUs so that the cookie will be
2994 // shared by all vCPU threads.
2995 // TODO(b/199312402): Avoid enabling core scheduling for the crosvm process
2996 // itself for even better performance. Only vCPUs need the feature.
2997 if enable_per_vm_core_scheduling {
2998 if let Err(e) = enable_core_scheduling() {
2999 error!("Failed to enable core scheduling: {}", e);
3000 }
3001 }
Daniel Verkamp94c35272019-09-12 13:31:30 -07003002 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Dylan Reidb0492662019-05-17 14:50:13 -07003003 let (to_vcpu_channel, from_main_channel) = mpsc::channel();
Daniel Verkampc677fb42020-09-08 13:47:49 -07003004 let vcpu_affinity = match linux.vcpu_affinity.clone() {
3005 Some(VcpuAffinity::Global(v)) => v,
3006 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(),
3007 None => Default::default(),
3008 };
Zach Reizner55a9e502018-10-03 10:22:32 -07003009 let handle = run_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07003010 cpu_id,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08003011 kvm_vcpu_ids[cpu_id],
Zach Reizner55a9e502018-10-03 10:22:32 -07003012 vcpu,
Daniel Verkamp6b298582021-08-16 15:37:11 -07003013 linux.vm.try_clone().context("failed to clone vm")?,
3014 linux
3015 .irq_chip
3016 .try_box_clone()
3017 .context("failed to clone irqchip")?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003018 linux.vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09003019 linux.rt_cpus.contains(&cpu_id),
Daniel Verkampc677fb42020-09-08 13:47:49 -07003020 vcpu_affinity,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09003021 linux.delay_rt,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09003022 linux.no_smt,
Zach Reizner55a9e502018-10-03 10:22:32 -07003023 vcpu_thread_barrier.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07003024 linux.has_bios,
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07003025 (*linux.io_bus).clone(),
3026 (*linux.mmio_bus).clone(),
Daniel Verkamp6b298582021-08-16 15:37:11 -07003027 exit_evt.try_clone().context("failed to clone event")?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003028 linux.vm.check_capability(VmCap::PvClockSuspend),
Dylan Reidb0492662019-05-17 14:50:13 -07003029 from_main_channel,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003030 use_hypervisor_signals,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003031 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
3032 to_gdb_channel.clone(),
Yusuke Sato31e136a2021-08-18 11:51:38 -07003033 enable_per_vm_core_scheduling,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08003034 host_cpu_topology,
Zach Reizner55a9e502018-10-03 10:22:32 -07003035 )?;
Dylan Reidb0492662019-05-17 14:50:13 -07003036 vcpu_handles.push((handle, to_vcpu_channel));
Dylan Reid059a1882018-07-23 17:58:09 -07003037 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07003038
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003039 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
3040 // Spawn GDB thread.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003041 if let Some((gdb_port_num, gdb_control_tube)) = linux.gdb.take() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003042 let to_vcpu_channels = vcpu_handles
3043 .iter()
3044 .map(|(_handle, channel)| channel.clone())
3045 .collect();
3046 let target = GdbStub::new(
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003047 gdb_control_tube,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003048 to_vcpu_channels,
3049 from_vcpu_channel.unwrap(), // Must succeed to unwrap()
3050 );
3051 thread::Builder::new()
3052 .name("gdb".to_owned())
3053 .spawn(move || gdb_thread(target, gdb_port_num))
Daniel Verkamp6b298582021-08-16 15:37:11 -07003054 .context("failed to spawn GDB thread")?;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003055 };
3056
Dylan Reid059a1882018-07-23 17:58:09 -07003057 vcpu_thread_barrier.wait();
3058
Charles William Dick54045012021-07-27 19:11:53 +09003059 let mut balloon_stats_id: u64 = 0;
3060
Michael Hoylee392c462020-10-07 03:29:24 -07003061 'wait: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07003062 let events = {
Michael Hoylee392c462020-10-07 03:29:24 -07003063 match wait_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08003064 Ok(v) => v,
3065 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08003066 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003067 break;
3068 }
3069 }
3070 };
Zach Reiznera60744b2019-02-13 17:33:32 -08003071
Steven Richmanf32d0b42020-06-20 21:45:32 -07003072 if let Err(e) = linux.irq_chip.process_delayed_irq_events() {
3073 warn!("can't deliver delayed irqs: {}", e);
3074 }
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08003075
Zach Reiznera60744b2019-02-13 17:33:32 -08003076 let mut vm_control_indices_to_remove = Vec::new();
Michael Hoylee392c462020-10-07 03:29:24 -07003077 for event in events.iter().filter(|e| e.is_readable) {
3078 match event.token {
Zach Reizner5bed0d22018-03-28 02:31:11 -07003079 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08003080 info!("vcpu requested shutdown");
Michael Hoylee392c462020-10-07 03:29:24 -07003081 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08003082 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003083 Token::Suspend => {
3084 info!("VM requested suspend");
3085 linux.suspend_evt.read().unwrap();
Zach Reiznerdc748482021-04-14 13:59:30 -07003086 kick_all_vcpus(
3087 &vcpu_handles,
3088 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003089 VcpuControl::RunState(VmRunMode::Suspending),
Zach Reiznerdc748482021-04-14 13:59:30 -07003090 );
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003091 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003092 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08003093 // Print all available siginfo structs, then exit the loop.
Daniel Verkamp6b298582021-08-16 15:37:11 -07003094 while let Some(siginfo) =
3095 sigchld_fd.read().context("failed to create signalfd")?
3096 {
Zach Reizner3ba00982019-01-23 19:04:43 -08003097 let pid = siginfo.ssi_pid;
3098 let pid_label = match linux.pid_debug_label_map.get(&pid) {
3099 Some(label) => format!("{} (pid {})", label, pid),
3100 None => format!("pid {}", pid),
3101 };
David Tolnayf5032762018-12-03 10:46:45 -08003102 error!(
3103 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08003104 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08003105 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08003106 }
Michael Hoylee392c462020-10-07 03:29:24 -07003107 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08003108 }
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003109 Token::IrqFd { index } => {
3110 if let Err(e) = linux.irq_chip.service_irq_event(index) {
3111 error!("failed to signal irq {}: {}", index, e);
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08003112 }
3113 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003114 Token::VmControlServer => {
3115 if let Some(socket_server) = &control_server_socket {
3116 match socket_server.accept() {
3117 Ok(socket) => {
Michael Hoylee392c462020-10-07 03:29:24 -07003118 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08003119 .add(
3120 &socket,
3121 Token::VmControl {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003122 index: control_tubes.len(),
Zach Reiznera60744b2019-02-13 17:33:32 -08003123 },
3124 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07003125 .context("failed to add descriptor to wait context")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003126 control_tubes.push(TaggedControlTube::Vm(Tube::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08003127 }
3128 Err(e) => error!("failed to accept socket: {}", e),
3129 }
3130 }
3131 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003132 Token::VmControl { index } => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003133 if let Some(socket) = control_tubes.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003134 match socket {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003135 TaggedControlTube::Vm(tube) => match tube.recv::<VmRequest>() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003136 Ok(request) => {
3137 let mut run_mode_opt = None;
3138 let response = request.execute(
3139 &mut run_mode_opt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003140 &balloon_host_tube,
Charles William Dick54045012021-07-27 19:11:53 +09003141 &mut balloon_stats_id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003142 disk_host_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07003143 #[cfg(feature = "usb")]
3144 Some(&usb_control_tube),
3145 #[cfg(not(feature = "usb"))]
3146 None,
Chuanxiao Dong256be3a2020-04-27 16:39:33 +08003147 &mut linux.bat_control,
Suleiman Souhlal2ac78b92021-02-01 12:33:26 +09003148 &vcpu_handles,
Jakub Starond99cd0a2019-04-11 14:09:39 -07003149 );
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003150 if let Err(e) = tube.send(&response) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003151 error!("failed to send VmResponse: {}", e);
3152 }
3153 if let Some(run_mode) = run_mode_opt {
3154 info!("control socket changed run mode to {}", run_mode);
3155 match run_mode {
3156 VmRunMode::Exiting => {
Michael Hoylee392c462020-10-07 03:29:24 -07003157 break 'wait;
Jakub Starond99cd0a2019-04-11 14:09:39 -07003158 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003159 other => {
Chuanxiao Dong2bbe85c2020-11-12 17:18:07 +08003160 if other == VmRunMode::Running {
Daniel Verkampda4e8a92021-07-21 13:49:02 -07003161 for dev in &linux.resume_notify_devices {
3162 dev.lock().resume_imminent();
3163 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003164 }
Steven Richman11dc6712020-09-02 15:39:14 -07003165 kick_all_vcpus(
3166 &vcpu_handles,
Zach Reiznerdc748482021-04-14 13:59:30 -07003167 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003168 VcpuControl::RunState(other),
Steven Richman11dc6712020-09-02 15:39:14 -07003169 );
Zach Reizner6a8fdd92019-01-16 14:38:41 -08003170 }
3171 }
3172 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003173 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003174 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003175 if let TubeError::Disconnected = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003176 vm_control_indices_to_remove.push(index);
3177 } else {
3178 error!("failed to recv VmRequest: {}", e);
3179 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003180 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003181 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003182 TaggedControlTube::VmMemory(tube) => {
3183 match tube.recv::<VmMemoryRequest>() {
3184 Ok(request) => {
3185 let response = request.execute(
3186 &mut linux.vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07003187 &mut sys_allocator,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003188 Arc::clone(&map_request),
3189 &mut gralloc,
3190 );
3191 if let Err(e) = tube.send(&response) {
3192 error!("failed to send VmMemoryControlResponse: {}", e);
3193 }
3194 }
3195 Err(e) => {
3196 if let TubeError::Disconnected = e {
3197 vm_control_indices_to_remove.push(index);
3198 } else {
3199 error!("failed to recv VmMemoryControlRequest: {}", e);
3200 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003201 }
3202 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003203 }
3204 TaggedControlTube::VmIrq(tube) => match tube.recv::<VmIrqRequest>() {
Xiong Zhang2515b752019-09-19 10:29:02 +08003205 Ok(request) => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07003206 let response = {
3207 let irq_chip = &mut linux.irq_chip;
3208 request.execute(
3209 |setup| match setup {
3210 IrqSetup::Event(irq, ev) => {
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003211 if let Some(event_index) = irq_chip
3212 .register_irq_event(irq, ev, None)?
3213 {
3214 match wait_ctx.add(
3215 ev,
3216 Token::IrqFd {
3217 index: event_index
3218 },
3219 ) {
3220 Err(e) => {
3221 warn!("failed to add IrqFd to poll context: {}", e);
3222 Err(e)
3223 },
3224 Ok(_) => {
3225 Ok(())
3226 }
3227 }
3228 } else {
3229 Ok(())
3230 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07003231 }
3232 IrqSetup::Route(route) => irq_chip.route_irq(route),
3233 },
Zach Reiznerdc748482021-04-14 13:59:30 -07003234 &mut sys_allocator,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003235 )
3236 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003237 if let Err(e) = tube.send(&response) {
Xiong Zhang2515b752019-09-19 10:29:02 +08003238 error!("failed to send VmIrqResponse: {}", e);
3239 }
3240 }
3241 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003242 if let TubeError::Disconnected = e {
Xiong Zhang2515b752019-09-19 10:29:02 +08003243 vm_control_indices_to_remove.push(index);
3244 } else {
3245 error!("failed to recv VmIrqRequest: {}", e);
3246 }
3247 }
3248 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003249 TaggedControlTube::VmMsync(tube) => {
3250 match tube.recv::<VmMsyncRequest>() {
3251 Ok(request) => {
3252 let response = request.execute(&mut linux.vm);
3253 if let Err(e) = tube.send(&response) {
3254 error!("failed to send VmMsyncResponse: {}", e);
3255 }
3256 }
3257 Err(e) => {
3258 if let TubeError::Disconnected = e {
3259 vm_control_indices_to_remove.push(index);
3260 } else {
3261 error!("failed to recv VmMsyncRequest: {}", e);
3262 }
Daniel Verkampe1980a92020-02-07 11:00:55 -08003263 }
3264 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003265 }
3266 TaggedControlTube::Fs(tube) => match tube.recv::<FsMappingRequest>() {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003267 Ok(request) => {
3268 let response =
Zach Reiznerdc748482021-04-14 13:59:30 -07003269 request.execute(&mut linux.vm, &mut sys_allocator);
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003270 if let Err(e) = tube.send(&response) {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003271 error!("failed to send VmResponse: {}", e);
3272 }
3273 }
3274 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003275 if let TubeError::Disconnected = e {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003276 vm_control_indices_to_remove.push(index);
3277 } else {
3278 error!("failed to recv VmResponse: {}", e);
3279 }
3280 }
3281 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08003282 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003283 }
3284 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003285 }
3286 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003287
Vikram Auradkarede68c72021-07-01 14:33:54 -07003288 // It's possible more data is readable and buffered while the socket is hungup,
3289 // so don't delete the tube from the poll context until we're sure all the
3290 // data is read.
3291 // Below case covers a condition where we have received a hungup event and the tube is not
3292 // readable.
3293 // In case of readable tube, once all data is read, any attempt to read more data on hungup
3294 // tube should fail. On such failure, we get Disconnected error and index gets added to
3295 // vm_control_indices_to_remove by the time we reach here.
3296 for event in events.iter().filter(|e| e.is_hungup && !e.is_readable) {
3297 if let Token::VmControl { index } = event.token {
3298 vm_control_indices_to_remove.push(index);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003299 }
3300 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003301
3302 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08003303 // preserves correct indexes as each element is removed.
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07003304 vm_control_indices_to_remove.sort_unstable_by_key(|&k| Reverse(k));
Zach Reiznera60744b2019-02-13 17:33:32 -08003305 vm_control_indices_to_remove.dedup();
3306 for index in vm_control_indices_to_remove {
Michael Hoylee392c462020-10-07 03:29:24 -07003307 // Delete the socket from the `wait_ctx` synchronously. Otherwise, the kernel will do
3308 // this automatically when the FD inserted into the `wait_ctx` is closed after this
Zide Chen89584072019-11-14 10:33:51 -08003309 // if-block, but this removal can be deferred unpredictably. In some instances where the
Michael Hoylee392c462020-10-07 03:29:24 -07003310 // system is under heavy load, we can even get events returned by `wait_ctx` for an FD
Zide Chen89584072019-11-14 10:33:51 -08003311 // that has already been closed. Because the token associated with that spurious event
3312 // now belongs to a different socket, the control loop will start to interact with
3313 // sockets that might not be ready to use. This can cause incorrect hangup detection or
3314 // blocking on a socket that will never be ready. See also: crbug.com/1019986
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003315 if let Some(socket) = control_tubes.get(index) {
Daniel Verkamp6b298582021-08-16 15:37:11 -07003316 wait_ctx
3317 .delete(socket)
3318 .context("failed to remove descriptor from wait context")?;
Zide Chen89584072019-11-14 10:33:51 -08003319 }
3320
3321 // This line implicitly drops the socket at `index` when it gets returned by
3322 // `swap_remove`. After this line, the socket at `index` is not the one from
3323 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
Michael Hoylee392c462020-10-07 03:29:24 -07003324 // use `wait_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003325 control_tubes.swap_remove(index);
3326 if let Some(tube) = control_tubes.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07003327 wait_ctx
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003328 .modify(tube, EventType::Read, Token::VmControl { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07003329 .context("failed to add descriptor to wait context")?;
Zach Reiznera60744b2019-02-13 17:33:32 -08003330 }
3331 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003332 }
3333
Zach Reiznerdc748482021-04-14 13:59:30 -07003334 kick_all_vcpus(
3335 &vcpu_handles,
3336 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003337 VcpuControl::RunState(VmRunMode::Exiting),
Zach Reiznerdc748482021-04-14 13:59:30 -07003338 );
Steven Richman11dc6712020-09-02 15:39:14 -07003339 for (handle, _) in vcpu_handles {
3340 if let Err(e) = handle.join() {
3341 error!("failed to join vcpu thread: {:?}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003342 }
3343 }
3344
Daniel Verkamp94c35272019-09-12 13:31:30 -07003345 // Explicitly drop the VM structure here to allow the devices to clean up before the
3346 // control sockets are closed when this function exits.
3347 mem::drop(linux);
3348
Zach Reizner19ad1f32019-12-12 18:58:50 -08003349 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08003350 .set_canon_mode()
3351 .expect("failed to restore canonical mode for terminal");
3352
3353 Ok(())
3354}