blob: fe8a83073ff8cc3ff16411833d200f1891ce60ce [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Hikaru Nishida584e52c2021-04-27 17:37:08 +09005use std::cmp::Reverse;
Zide Chendfc4b882021-03-10 16:35:37 -08006use std::collections::BTreeMap;
Jakub Starona3411ea2019-04-24 10:55:25 -07007use std::convert::TryFrom;
John Batesb220eac2020-09-14 17:03:02 -07008#[cfg(feature = "gpu")]
9use std::env;
Dylan Reid059a1882018-07-23 17:58:09 -070010use std::fs::{File, OpenOptions};
Federico 'Morg' Pareschia1184822021-09-09 10:52:58 +090011use std::io::stdin;
Steven Richmanf32d0b42020-06-20 21:45:32 -070012use std::iter;
Daniel Verkamp94c35272019-09-12 13:31:30 -070013use std::mem;
David Tolnay2b089fc2019-03-04 15:33:22 -080014use std::net::Ipv4Addr;
Christian Blichmann50f95912021-11-05 16:59:39 +010015use std::os::unix::{io::FromRawFd, net::UnixStream, prelude::OpenOptionsExt};
Zach Reizner39aa26b2017-12-12 18:03:23 -080016use std::path::{Path, PathBuf};
Chirantan Ekbote448516e2018-07-24 16:07:42 -070017use std::str;
Dylan Reidb0492662019-05-17 14:50:13 -070018use std::sync::{mpsc, Arc, Barrier};
Hikaru Nishida584e52c2021-04-27 17:37:08 +090019use std::time::Duration;
Dylan Reidb0492662019-05-17 14:50:13 -070020
Zach Reizner39aa26b2017-12-12 18:03:23 -080021use std::thread;
22use std::thread::JoinHandle;
23
Daniel Verkamp6b298582021-08-16 15:37:11 -070024use libc::{self, c_int, gid_t, uid_t};
Zach Reizner39aa26b2017-12-12 18:03:23 -080025
Tomasz Jeznach42644642020-05-20 23:27:59 -070026use acpi_tables::sdt::SDT;
27
Daniel Verkamp6b298582021-08-16 15:37:11 -070028use anyhow::{anyhow, bail, Context, Result};
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +090029use base::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080030use base::*;
Keiichi Watanabe553d2192021-08-16 16:42:27 +090031use devices::serial_device::{SerialHardware, SerialParameters};
Zide Chenafdb9382021-06-17 12:04:43 -070032use devices::vfio::{VfioCommonSetup, VfioCommonTrait};
Woody Chow0b2b6062021-09-03 15:40:02 +090033#[cfg(feature = "audio_cras")]
34use devices::virtio::snd::cras_backend::Parameters as CrasSndParameters;
Woody Chow1b16db12021-04-02 16:59:59 +090035#[cfg(feature = "audio")]
36use devices::virtio::vhost::user::vmm::Snd as VhostUserSnd;
Keiichi Watanabefb36e0c2021-08-13 18:48:31 +090037use devices::virtio::vhost::user::vmm::{
Richard5afeafa2021-07-26 19:02:09 -070038 Block as VhostUserBlock, Console as VhostUserConsole, Fs as VhostUserFs,
Chirantan Ekbote84091e52021-09-10 18:43:17 +090039 Mac80211Hwsim as VhostUserMac80211Hwsim, Net as VhostUserNet, Vsock as VhostUserVsock,
40 Wl as VhostUserWl,
Keiichi Watanabe60686582021-03-12 04:53:51 +090041};
Alexandre Courbotb42b3e52021-07-09 23:38:57 +090042#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
43use devices::virtio::VideoBackendType;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070044use devices::virtio::{self, Console, VirtioDevice};
Chirantan Ekbote44292f52021-06-25 18:31:41 +090045#[cfg(feature = "gpu")]
46use devices::virtio::{
Chia-I Wu16fb6592021-11-10 11:45:32 -080047 gpu::{GpuRenderServerParameters, DEFAULT_DISPLAY_HEIGHT, DEFAULT_DISPLAY_WIDTH},
Chirantan Ekbote44292f52021-06-25 18:31:41 +090048 vhost::user::vmm::Gpu as VhostUserGpu,
49 EventDevice,
50};
paulhsiace17e6e2020-08-28 18:37:45 +080051#[cfg(feature = "audio")]
52use devices::Ac97Dev;
Xiong Zhang17b0daf2019-04-23 17:14:50 +080053use devices::{
Xiong Zhangf82f2dc2021-05-21 16:54:12 +080054 self, BusDeviceObj, HostHotPlugKey, HotPlugBus, IrqChip, IrqEventIndex, KvmKernelIrqChip,
55 PciAddress, PciBridge, PciDevice, PcieRootPort, StubPciDevice, VcpuRunState, VfioContainer,
56 VfioDevice, VfioPciDevice, VfioPlatformDevice, VirtioPciDevice,
Xiong Zhang17b0daf2019-04-23 17:14:50 +080057};
Daniel Verkampf1439d42021-05-21 13:55:10 -070058#[cfg(feature = "usb")]
59use devices::{HostBackendDeviceProvider, XhciController};
Steven Richmanf32d0b42020-06-20 21:45:32 -070060use hypervisor::kvm::{Kvm, KvmVcpu, KvmVm};
Andrew Walbran00f1c9f2021-12-10 17:13:08 +000061use hypervisor::{HypervisorCap, ProtectionType, Vcpu, VcpuExit, VcpuRunHandle, Vm, VmCap};
Allen Webbf3024c82020-06-19 07:19:48 -070062use minijail::{self, Minijail};
Richard5afeafa2021-07-26 19:02:09 -070063use net_util::{MacAddress, Tap};
Xiong Zhang87a3b442019-10-29 17:32:44 +080064use resources::{Alloc, MmioType, SystemAllocator};
Gurchetan Singh293913c2020-12-09 10:44:13 -080065use rutabaga_gfx::RutabagaGralloc;
Dylan Reidb0492662019-05-17 14:50:13 -070066use sync::Mutex;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080067use vm_control::*;
Sergey Senozhatskyd78d05b2021-04-13 20:59:58 +090068use vm_memory::{GuestAddress, GuestMemory, MemoryPolicy};
Zach Reizner39aa26b2017-12-12 18:03:23 -080069
Keiichi Watanabec5262e92020-10-21 15:57:33 +090070#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
71use crate::gdb::{gdb_thread, GdbStub};
Keiichi Watanabef3a37f42021-01-21 15:41:11 +090072use crate::{
Tomasz Nowicki71aca792021-06-09 18:53:49 +000073 Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption, VfioType,
Christian Blichmann50f95912021-11-05 16:59:39 +010074 VhostUserFsOption, VhostUserOption, VhostUserWlOption, VhostVsockDeviceParameter,
Keiichi Watanabef3a37f42021-01-21 15:41:11 +090075};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070076use arch::{
Keiichi Watanabe553d2192021-08-16 16:42:27 +090077 self, LinuxArch, RunnableLinuxVm, VcpuAffinity, VirtioDeviceStub, VmComponents, VmImage,
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070078};
Sonny Raoed517d12018-02-13 22:09:43 -080079
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080080#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070081use {
82 aarch64::AArch64 as Arch,
Steven Richman11dc6712020-09-02 15:39:14 -070083 devices::IrqChipAArch64 as IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -070084 hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
85};
Zach Reizner55a9e502018-10-03 10:22:32 -070086#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070087use {
Steven Richman11dc6712020-09-02 15:39:14 -070088 devices::{IrqChipX86_64 as IrqChipArch, KvmSplitIrqChip},
89 hypervisor::{VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
Steven Richmanf32d0b42020-06-20 21:45:32 -070090 x86_64::X8664arch as Arch,
91};
Zach Reizner39aa26b2017-12-12 18:03:23 -080092
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080093enum TaggedControlTube {
94 Fs(Tube),
95 Vm(Tube),
96 VmMemory(Tube),
97 VmIrq(Tube),
98 VmMsync(Tube),
Jakub Starond99cd0a2019-04-11 14:09:39 -070099}
100
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800101impl AsRef<Tube> for TaggedControlTube {
102 fn as_ref(&self) -> &Tube {
103 use self::TaggedControlTube::*;
Jakub Starond99cd0a2019-04-11 14:09:39 -0700104 match &self {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800105 Fs(tube) | Vm(tube) | VmMemory(tube) | VmIrq(tube) | VmMsync(tube) => tube,
Jakub Starond99cd0a2019-04-11 14:09:39 -0700106 }
107 }
108}
109
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800110impl AsRawDescriptor for TaggedControlTube {
Michael Hoylee392c462020-10-07 03:29:24 -0700111 fn as_raw_descriptor(&self) -> RawDescriptor {
Michael Hoylea596a072020-11-10 19:32:45 -0800112 self.as_ref().as_raw_descriptor()
Jakub Starond99cd0a2019-04-11 14:09:39 -0700113 }
114}
115
Matt Delcoc24ad782020-02-14 13:24:36 -0800116struct SandboxConfig<'a> {
117 limit_caps: bool,
118 log_failures: bool,
119 seccomp_policy: &'a Path,
120 uid_map: Option<&'a str>,
121 gid_map: Option<&'a str>,
122}
123
Zach Reizner44863792019-06-26 14:22:08 -0700124fn create_base_minijail(
125 root: &Path,
Matt Delcoc24ad782020-02-14 13:24:36 -0800126 r_limit: Option<u64>,
127 config: Option<&SandboxConfig>,
Zach Reizner44863792019-06-26 14:22:08 -0700128) -> Result<Minijail> {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800129 // All child jails run in a new user namespace without any users mapped,
130 // they run as nobody unless otherwise configured.
Daniel Verkamp6b298582021-08-16 15:37:11 -0700131 let mut j = Minijail::new().context("failed to jail device")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800132
133 if let Some(config) = config {
134 j.namespace_pids();
135 j.namespace_user();
136 j.namespace_user_disable_setgroups();
137 if config.limit_caps {
138 // Don't need any capabilities.
139 j.use_caps(0);
140 }
141 if let Some(uid_map) = config.uid_map {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700142 j.uidmap(uid_map).context("error setting UID map")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800143 }
144 if let Some(gid_map) = config.gid_map {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700145 j.gidmap(gid_map).context("error setting GID map")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800146 }
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900147 // Run in a new mount namespace.
148 j.namespace_vfs();
149
Matt Delcoc24ad782020-02-14 13:24:36 -0800150 // Run in an empty network namespace.
151 j.namespace_net();
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900152
153 // Don't allow the device to gain new privileges.
Matt Delcoc24ad782020-02-14 13:24:36 -0800154 j.no_new_privs();
155
156 // By default we'll prioritize using the pre-compiled .bpf over the .policy
157 // file (the .bpf is expected to be compiled using "trap" as the failure
158 // behavior instead of the default "kill" behavior).
159 // Refer to the code comment for the "seccomp-log-failures"
160 // command-line parameter for an explanation about why the |log_failures|
161 // flag forces the use of .policy files (and the build-time alternative to
162 // this run-time flag).
163 let bpf_policy_file = config.seccomp_policy.with_extension("bpf");
164 if bpf_policy_file.exists() && !config.log_failures {
165 j.parse_seccomp_program(&bpf_policy_file)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700166 .context("failed to parse precompiled seccomp policy")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800167 } else {
168 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP,
169 // which will correctly kill the entire device process if a worker
170 // thread commits a seccomp violation.
171 j.set_seccomp_filter_tsync();
172 if config.log_failures {
173 j.log_seccomp_filter_failures();
174 }
175 j.parse_seccomp_filters(&config.seccomp_policy.with_extension("policy"))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700176 .context("failed to parse seccomp policy")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800177 }
178 j.use_seccomp_filter();
179 // Don't do init setup.
180 j.run_as_init();
181 }
182
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900183 // Only pivot_root if we are not re-using the current root directory.
184 if root != Path::new("/") {
185 // It's safe to call `namespace_vfs` multiple times.
186 j.namespace_vfs();
Daniel Verkamp6b298582021-08-16 15:37:11 -0700187 j.enter_pivot_root(root)
188 .context("failed to pivot root device")?;
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900189 }
Matt Delco45caf912019-11-13 08:11:09 -0800190
Matt Delcoc24ad782020-02-14 13:24:36 -0800191 // Most devices don't need to open many fds.
192 let limit = if let Some(r) = r_limit { r } else { 1024u64 };
193 j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700194 .context("error setting max open files")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800195
Zach Reizner39aa26b2017-12-12 18:03:23 -0800196 Ok(j)
197}
198
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800199fn simple_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
Lepton Wu9105e9f2019-03-14 11:38:31 -0700200 if cfg.sandbox {
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800201 let pivot_root: &str = option_env!("DEFAULT_PIVOT_ROOT").unwrap_or("/var/empty");
202 // A directory for a jailed device's pivot root.
203 let root_path = Path::new(pivot_root);
204 if !root_path.exists() {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700205 bail!("{} doesn't exist, can't jail devices", pivot_root);
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800206 }
207 let policy_path: PathBuf = cfg.seccomp_policy_dir.join(policy);
Matt Delcoc24ad782020-02-14 13:24:36 -0800208 let config = SandboxConfig {
209 limit_caps: true,
210 log_failures: cfg.seccomp_log_failures,
211 seccomp_policy: &policy_path,
212 uid_map: None,
213 gid_map: None,
214 };
215 Ok(Some(create_base_minijail(root_path, None, Some(&config))?))
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800216 } else {
217 Ok(None)
218 }
219}
220
Daniel Verkamp6b298582021-08-16 15:37:11 -0700221type DeviceResult<T = VirtioDeviceStub> = Result<T>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800222
Andrew Walbran4cad30a2021-06-28 15:58:08 +0000223fn create_block_device(cfg: &Config, disk: &DiskOption, disk_device_tube: Tube) -> DeviceResult {
Junichi Uekawa7bea39f2021-07-16 14:05:06 +0900224 let raw_image: File = open_file(&disk.path, disk.read_only, disk.o_direct)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700225 .with_context(|| format!("failed to load disk image {}", disk.path.display()))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800226 // Lock the disk image to prevent other crosvm instances from using it.
227 let lock_op = if disk.read_only {
228 FlockOperation::LockShared
229 } else {
230 FlockOperation::LockExclusive
231 };
Daniel Verkamp6b298582021-08-16 15:37:11 -0700232 flock(&raw_image, lock_op, true).context("failed to lock disk image")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800233
Junichi Uekawa52437db2021-09-29 17:33:07 +0900234 info!("Trying to attach block device: {}", disk.path.display());
Daniel Verkamp6b298582021-08-16 15:37:11 -0700235 let dev = if disk::async_ok(&raw_image).context("failed to check disk async_ok")? {
236 let async_file = disk::create_async_disk_file(raw_image)
237 .context("failed to create async virtual disk")?;
Dylan Reid503c5ab2020-07-17 11:20:07 -0700238 Box::new(
239 virtio::BlockAsync::new(
240 virtio::base_features(cfg.protected_vm),
241 async_file,
242 disk.read_only,
243 disk.sparse,
244 disk.block_size,
Daniel Verkampdd0ee592021-03-29 13:05:22 -0700245 disk.id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800246 Some(disk_device_tube),
Dylan Reid503c5ab2020-07-17 11:20:07 -0700247 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700248 .context("failed to create block device")?,
Dylan Reid503c5ab2020-07-17 11:20:07 -0700249 ) as Box<dyn VirtioDevice>
250 } else {
Daniel Verkampeb1640e2021-09-07 14:09:31 -0700251 let disk_file = disk::create_disk_file(raw_image, disk::MAX_NESTING_DEPTH)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700252 .context("failed to create virtual disk")?;
Dylan Reid503c5ab2020-07-17 11:20:07 -0700253 Box::new(
254 virtio::Block::new(
255 virtio::base_features(cfg.protected_vm),
256 disk_file,
257 disk.read_only,
258 disk.sparse,
259 disk.block_size,
260 disk.id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800261 Some(disk_device_tube),
Dylan Reid503c5ab2020-07-17 11:20:07 -0700262 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700263 .context("failed to create block device")?,
Dylan Reid503c5ab2020-07-17 11:20:07 -0700264 ) as Box<dyn VirtioDevice>
265 };
David Tolnay2b089fc2019-03-04 15:33:22 -0800266
267 Ok(VirtioDeviceStub {
Dylan Reid503c5ab2020-07-17 11:20:07 -0700268 dev,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700269 jail: simple_jail(cfg, "block_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800270 })
271}
272
Keiichi Watanabef3a37f42021-01-21 15:41:11 +0900273fn create_vhost_user_block_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
274 let dev = VhostUserBlock::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700275 .context("failed to set up vhost-user block device")?;
Keiichi Watanabef3a37f42021-01-21 15:41:11 +0900276
277 Ok(VirtioDeviceStub {
278 dev: Box::new(dev),
279 // no sandbox here because virtqueue handling is exported to a different process.
280 jail: None,
281 })
282}
283
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +0900284fn create_vhost_user_console_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
285 let dev = VhostUserConsole::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700286 .context("failed to set up vhost-user console device")?;
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +0900287
288 Ok(VirtioDeviceStub {
289 dev: Box::new(dev),
290 // no sandbox here because virtqueue handling is exported to a different process.
291 jail: None,
292 })
293}
294
Woody Chow5890b702021-02-12 14:57:02 +0900295fn create_vhost_user_fs_device(cfg: &Config, option: &VhostUserFsOption) -> DeviceResult {
296 let dev = VhostUserFs::new(
297 virtio::base_features(cfg.protected_vm),
298 &option.socket,
299 &option.tag,
300 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700301 .context("failed to set up vhost-user fs device")?;
Woody Chow5890b702021-02-12 14:57:02 +0900302
303 Ok(VirtioDeviceStub {
304 dev: Box::new(dev),
305 // no sandbox here because virtqueue handling is exported to a different process.
306 jail: None,
307 })
308}
309
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900310fn create_vhost_user_mac80211_hwsim_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
311 let dev = VhostUserMac80211Hwsim::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700312 .context("failed to set up vhost-user mac80211_hwsim device")?;
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900313
314 Ok(VirtioDeviceStub {
315 dev: Box::new(dev),
316 // no sandbox here because virtqueue handling is exported to a different process.
317 jail: None,
318 })
319}
320
Woody Chow1b16db12021-04-02 16:59:59 +0900321#[cfg(feature = "audio")]
322fn create_vhost_user_snd_device(cfg: &Config, option: &VhostUserOption) -> DeviceResult {
323 let dev = VhostUserSnd::new(virtio::base_features(cfg.protected_vm), &option.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700324 .context("failed to set up vhost-user snd device")?;
Woody Chow1b16db12021-04-02 16:59:59 +0900325
326 Ok(VirtioDeviceStub {
327 dev: Box::new(dev),
328 // no sandbox here because virtqueue handling is exported to a different process.
329 jail: None,
330 })
331}
332
David Tolnay2b089fc2019-03-04 15:33:22 -0800333fn create_rng_device(cfg: &Config) -> DeviceResult {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700334 let dev = virtio::Rng::new(virtio::base_features(cfg.protected_vm))
335 .context("failed to set up rng")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800336
337 Ok(VirtioDeviceStub {
338 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700339 jail: simple_jail(cfg, "rng_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800340 })
341}
342
Woody Chow737ff122021-03-22 17:49:57 +0900343#[cfg(feature = "audio_cras")]
Woody Chow0b2b6062021-09-03 15:40:02 +0900344fn create_cras_snd_device(cfg: &Config, cras_snd: CrasSndParameters) -> DeviceResult {
345 let dev = virtio::snd::cras_backend::VirtioSndCras::new(
346 virtio::base_features(cfg.protected_vm),
347 cras_snd,
348 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700349 .context("failed to create cras sound device")?;
Woody Chow737ff122021-03-22 17:49:57 +0900350
351 let jail = match simple_jail(&cfg, "cras_snd_device")? {
352 Some(mut jail) => {
353 // Create a tmpfs in the device's root directory for cras_snd_device.
354 // The size is 20*1024, or 20 KB.
355 jail.mount_with_data(
356 Path::new("none"),
357 Path::new("/"),
358 "tmpfs",
359 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
360 "size=20480",
361 )?;
362
363 let run_cras_path = Path::new("/run/cras");
364 jail.mount_bind(run_cras_path, run_cras_path, true)?;
365
366 add_current_user_to_jail(&mut jail)?;
367
368 Some(jail)
369 }
370 None => None,
371 };
372
373 Ok(VirtioDeviceStub {
374 dev: Box::new(dev),
375 jail,
376 })
377}
378
David Tolnay2b089fc2019-03-04 15:33:22 -0800379#[cfg(feature = "tpm")]
380fn create_tpm_device(cfg: &Config) -> DeviceResult {
381 use std::ffi::CString;
382 use std::fs;
383 use std::process;
David Tolnay2b089fc2019-03-04 15:33:22 -0800384
385 let tpm_storage: PathBuf;
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700386 let mut tpm_jail = simple_jail(cfg, "tpm_device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800387
388 match &mut tpm_jail {
389 Some(jail) => {
390 // Create a tmpfs in the device's root directory for tpm
391 // simulator storage. The size is 20*1024, or 20 KB.
392 jail.mount_with_data(
393 Path::new("none"),
394 Path::new("/"),
395 "tmpfs",
396 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
397 "size=20480",
398 )?;
399
Fergus Dall51200512021-08-19 12:54:26 +1000400 let crosvm_ids = add_current_user_to_jail(jail)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800401
402 let pid = process::id();
403 let tpm_pid_dir = format!("/run/vm/tpm.{}", pid);
404 tpm_storage = Path::new(&tpm_pid_dir).to_owned();
Daniel Verkamp6b298582021-08-16 15:37:11 -0700405 fs::create_dir_all(&tpm_storage).with_context(|| {
406 format!("failed to create tpm storage dir {}", tpm_storage.display())
407 })?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800408 let tpm_pid_dir_c = CString::new(tpm_pid_dir).expect("no nul bytes");
David Tolnayfd0971d2019-03-04 17:15:57 -0800409 chown(&tpm_pid_dir_c, crosvm_ids.uid, crosvm_ids.gid)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700410 .context("failed to chown tpm storage")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800411
412 jail.mount_bind(&tpm_storage, &tpm_storage, true)?;
413 }
414 None => {
415 // Path used inside cros_sdk which does not have /run/vm.
416 tpm_storage = Path::new("/tmp/tpm-simulator").to_owned();
417 }
418 }
419
420 let dev = virtio::Tpm::new(tpm_storage);
421
422 Ok(VirtioDeviceStub {
423 dev: Box::new(dev),
424 jail: tpm_jail,
425 })
426}
427
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700428fn create_single_touch_device(
429 cfg: &Config,
430 single_touch_spec: &TouchDeviceOption,
431 idx: u32,
432) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800433 let socket = single_touch_spec
434 .get_path()
435 .into_unix_stream()
436 .map_err(|e| {
437 error!("failed configuring virtio single touch: {:?}", e);
438 e
439 })?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800440
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800441 let (width, height) = single_touch_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700442 let dev = virtio::new_single_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700443 idx,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700444 socket,
445 width,
446 height,
447 virtio::base_features(cfg.protected_vm),
448 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700449 .context("failed to set up input device")?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800450 Ok(VirtioDeviceStub {
451 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700452 jail: simple_jail(cfg, "input_device")?,
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800453 })
454}
455
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700456fn create_multi_touch_device(
457 cfg: &Config,
458 multi_touch_spec: &TouchDeviceOption,
459 idx: u32,
460) -> DeviceResult {
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000461 let socket = multi_touch_spec
462 .get_path()
463 .into_unix_stream()
464 .map_err(|e| {
465 error!("failed configuring virtio multi touch: {:?}", e);
466 e
467 })?;
468
469 let (width, height) = multi_touch_spec.get_size();
470 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700471 idx,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000472 socket,
473 width,
474 height,
475 virtio::base_features(cfg.protected_vm),
476 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700477 .context("failed to set up input device")?;
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000478
479 Ok(VirtioDeviceStub {
480 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700481 jail: simple_jail(cfg, "input_device")?,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000482 })
483}
484
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700485fn create_trackpad_device(
486 cfg: &Config,
487 trackpad_spec: &TouchDeviceOption,
488 idx: u32,
489) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800490 let socket = trackpad_spec.get_path().into_unix_stream().map_err(|e| {
Maciek Swiechc3011222021-11-24 21:01:04 +0000491 error!("failed configuring virtio trackpad: {:#}", e);
David Tolnay2b089fc2019-03-04 15:33:22 -0800492 e
493 })?;
494
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800495 let (width, height) = trackpad_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700496 let dev = virtio::new_trackpad(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700497 idx,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700498 socket,
499 width,
500 height,
501 virtio::base_features(cfg.protected_vm),
502 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700503 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800504
505 Ok(VirtioDeviceStub {
506 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700507 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800508 })
509}
510
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700511fn create_mouse_device<T: IntoUnixStream>(cfg: &Config, mouse_socket: T, idx: u32) -> DeviceResult {
Zach Reizner65b98f12019-11-22 17:34:58 -0800512 let socket = mouse_socket.into_unix_stream().map_err(|e| {
Maciek Swiechc3011222021-11-24 21:01:04 +0000513 error!("failed configuring virtio mouse: {:#}", e);
David Tolnay2b089fc2019-03-04 15:33:22 -0800514 e
515 })?;
516
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700517 let dev = virtio::new_mouse(idx, socket, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700518 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800519
520 Ok(VirtioDeviceStub {
521 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700522 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800523 })
524}
525
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700526fn create_keyboard_device<T: IntoUnixStream>(
527 cfg: &Config,
528 keyboard_socket: T,
529 idx: u32,
530) -> DeviceResult {
Zach Reizner65b98f12019-11-22 17:34:58 -0800531 let socket = keyboard_socket.into_unix_stream().map_err(|e| {
Maciek Swiechc3011222021-11-24 21:01:04 +0000532 error!("failed configuring virtio keyboard: {:#}", e);
David Tolnay2b089fc2019-03-04 15:33:22 -0800533 e
534 })?;
535
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700536 let dev = virtio::new_keyboard(idx, socket, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700537 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800538
539 Ok(VirtioDeviceStub {
540 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700541 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800542 })
543}
544
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700545fn create_switches_device<T: IntoUnixStream>(
546 cfg: &Config,
547 switches_socket: T,
548 idx: u32,
549) -> DeviceResult {
Daniel Norman5e23df72021-03-11 10:11:02 -0800550 let socket = switches_socket.into_unix_stream().map_err(|e| {
Maciek Swiechc3011222021-11-24 21:01:04 +0000551 error!("failed configuring virtio switches: {:#}", e);
Daniel Norman5e23df72021-03-11 10:11:02 -0800552 e
553 })?;
554
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700555 let dev = virtio::new_switches(idx, socket, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700556 .context("failed to set up input device")?;
Daniel Norman5e23df72021-03-11 10:11:02 -0800557
558 Ok(VirtioDeviceStub {
559 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700560 jail: simple_jail(cfg, "input_device")?,
Daniel Norman5e23df72021-03-11 10:11:02 -0800561 })
562}
563
David Tolnay2b089fc2019-03-04 15:33:22 -0800564fn create_vinput_device(cfg: &Config, dev_path: &Path) -> DeviceResult {
565 let dev_file = OpenOptions::new()
566 .read(true)
567 .write(true)
568 .open(dev_path)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700569 .with_context(|| format!("failed to open vinput device {}", dev_path.display()))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800570
Noah Goldd4ca29b2020-10-27 12:21:52 -0700571 let dev = virtio::new_evdev(dev_file, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700572 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800573
574 Ok(VirtioDeviceStub {
575 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700576 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800577 })
578}
579
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800580fn create_balloon_device(cfg: &Config, tube: Tube) -> DeviceResult {
581 let dev = virtio::Balloon::new(virtio::base_features(cfg.protected_vm), tube)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700582 .context("failed to create balloon")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800583
584 Ok(VirtioDeviceStub {
585 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700586 jail: simple_jail(cfg, "balloon_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800587 })
588}
589
Alexandre Courbot911773a2021-12-10 14:31:10 +0900590/// Generic method for creating a network device. `create_device` is a closure that takes the virtio
591/// features and number of queue pairs as parameters, and is responsible for creating the device
592/// itself.
593fn create_net_device<F, T>(cfg: &Config, policy: &str, create_device: F) -> DeviceResult
594where
595 F: Fn(u64, u16) -> Result<T>,
596 T: VirtioDevice + 'static,
597{
Xiong Zhang773c7072020-03-20 10:39:55 +0800598 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
599 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700600 if vcpu_count < vq_pairs as usize {
Alexandre Courbot911773a2021-12-10 14:31:10 +0900601 warn!("the number of net vq pairs must not exceed the vcpu count, falling back to single queue mode");
Xiong Zhang773c7072020-03-20 10:39:55 +0800602 vq_pairs = 1;
603 }
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100604 let features = virtio::base_features(cfg.protected_vm);
Alexandre Courbot911773a2021-12-10 14:31:10 +0900605
606 let dev = create_device(features, vq_pairs)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800607
608 Ok(VirtioDeviceStub {
Alexandre Courbot911773a2021-12-10 14:31:10 +0900609 dev: Box::new(dev) as Box<dyn VirtioDevice>,
610 jail: simple_jail(cfg, policy)?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800611 })
612}
613
Alexandre Courbot911773a2021-12-10 14:31:10 +0900614/// Returns a network device created from a new TAP interface configured with `host_ip`, `netmask`,
615/// and `mac_address`.
616fn create_net_device_from_config(
David Tolnay2b089fc2019-03-04 15:33:22 -0800617 cfg: &Config,
618 host_ip: Ipv4Addr,
619 netmask: Ipv4Addr,
620 mac_address: MacAddress,
David Tolnay2b089fc2019-03-04 15:33:22 -0800621) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800622 let policy = if cfg.vhost_net {
Matt Delco45caf912019-11-13 08:11:09 -0800623 "vhost_net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800624 } else {
Matt Delco45caf912019-11-13 08:11:09 -0800625 "net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800626 };
627
Alexandre Courbot911773a2021-12-10 14:31:10 +0900628 if cfg.vhost_net {
629 create_net_device(cfg, policy, |features, _vq_pairs| {
630 virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(
631 &cfg.vhost_net_device_path,
632 features,
633 host_ip,
634 netmask,
635 mac_address,
636 )
637 .context("failed to set up vhost networking")
638 })
639 } else {
640 create_net_device(cfg, policy, |features, vq_pairs| {
641 virtio::Net::<Tap>::new(features, host_ip, netmask, mac_address, vq_pairs)
642 .context("failed to create virtio network device")
643 })
644 }
645}
646
647/// Returns a network device from a file descriptor to a configured TAP interface.
648fn create_tap_net_device_from_fd(cfg: &Config, tap_fd: RawDescriptor) -> DeviceResult {
649 create_net_device(cfg, "net_device", |features, vq_pairs| {
650 // Safe because we ensure that we get a unique handle to the fd.
651 let tap = unsafe {
652 Tap::from_raw_descriptor(
653 validate_raw_descriptor(tap_fd).context("failed to validate tap descriptor")?,
654 )
655 .context("failed to create tap device")?
656 };
657
658 virtio::Net::from(features, tap, vq_pairs).context("failed to create tap net device")
David Tolnay2b089fc2019-03-04 15:33:22 -0800659 })
660}
661
Alexandre Courbot993aa7f2021-12-09 14:51:29 +0900662/// Returns a network device created by opening the persistent, configured TAP interface `tap_name`.
663fn create_tap_net_device_from_name(cfg: &Config, tap_name: &[u8]) -> DeviceResult {
664 create_net_device(cfg, "net_device", |features, vq_pairs| {
665 virtio::Net::<Tap>::new_from_name(features, tap_name, vq_pairs)
666 .context("failed to create configured virtio network device")
667 })
668}
669
Keiichi Watanabe60686582021-03-12 04:53:51 +0900670fn create_vhost_user_net_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
671 let dev = VhostUserNet::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700672 .context("failed to set up vhost-user net device")?;
Keiichi Watanabe60686582021-03-12 04:53:51 +0900673
674 Ok(VirtioDeviceStub {
675 dev: Box::new(dev),
676 // no sandbox here because virtqueue handling is exported to a different process.
677 jail: None,
678 })
679}
680
Chirantan Ekbote84091e52021-09-10 18:43:17 +0900681fn create_vhost_user_vsock_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
682 let dev = VhostUserVsock::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700683 .context("failed to set up vhost-user vsock device")?;
Chirantan Ekbote84091e52021-09-10 18:43:17 +0900684
685 Ok(VirtioDeviceStub {
686 dev: Box::new(dev),
687 // no sandbox here because virtqueue handling is exported to a different process.
688 jail: None,
689 })
690}
691
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900692fn create_vhost_user_wl_device(cfg: &Config, opt: &VhostUserWlOption) -> DeviceResult {
693 // The crosvm wl device expects us to connect the tube before it will accept a vhost-user
694 // connection.
695 let dev = VhostUserWl::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700696 .context("failed to set up vhost-user wl device")?;
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900697
698 Ok(VirtioDeviceStub {
699 dev: Box::new(dev),
700 // no sandbox here because virtqueue handling is exported to a different process.
701 jail: None,
702 })
703}
704
David Tolnay2b089fc2019-03-04 15:33:22 -0800705#[cfg(feature = "gpu")]
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900706fn create_vhost_user_gpu_device(
707 cfg: &Config,
708 opt: &VhostUserOption,
709 host_tube: Tube,
710 device_tube: Tube,
711) -> DeviceResult {
712 // The crosvm gpu device expects us to connect the tube before it will accept a vhost-user
713 // connection.
714 let dev = VhostUserGpu::new(
715 virtio::base_features(cfg.protected_vm),
716 &opt.socket,
717 host_tube,
718 device_tube,
719 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700720 .context("failed to set up vhost-user gpu device")?;
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900721
722 Ok(VirtioDeviceStub {
723 dev: Box::new(dev),
724 // no sandbox here because virtqueue handling is exported to a different process.
725 jail: None,
726 })
727}
728
Alexandre Courbot22740d82021-12-15 17:06:27 +0900729/// Mirror-mount all the directories in `dirs` into `jail` on a best-effort basis.
730///
731/// This function will not return an error if any of the directories in `dirs` is missing.
732#[cfg(any(feature = "gpu", feature = "video-decoder", feature = "video-encoder"))]
733fn jail_mount_bind_if_exists<P: AsRef<std::ffi::OsStr>>(
734 jail: &mut Minijail,
735 dirs: &[P],
736) -> Result<()> {
737 for dir in dirs {
738 let dir_path = Path::new(dir);
739 if dir_path.exists() {
740 jail.mount_bind(dir_path, dir_path, false)?;
741 }
742 }
743
744 Ok(())
745}
746
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900747#[cfg(feature = "gpu")]
Chia-I Wufffb5692021-12-01 13:25:35 -0800748fn gpu_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
749 match simple_jail(cfg, policy)? {
750 Some(mut jail) => {
751 // Create a tmpfs in the device's root directory so that we can bind mount the
752 // dri directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
753 jail.mount_with_data(
754 Path::new("none"),
755 Path::new("/"),
756 "tmpfs",
757 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
758 "size=67108864",
759 )?;
760
761 // Device nodes required for DRM.
762 let sys_dev_char_path = Path::new("/sys/dev/char");
763 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
764 let sys_devices_path = Path::new("/sys/devices");
765 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
766
767 let drm_dri_path = Path::new("/dev/dri");
768 if drm_dri_path.exists() {
769 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
770 }
771
772 // If the ARM specific devices exist on the host, bind mount them in.
773 let mali0_path = Path::new("/dev/mali0");
774 if mali0_path.exists() {
775 jail.mount_bind(mali0_path, mali0_path, true)?;
776 }
777
778 let pvr_sync_path = Path::new("/dev/pvr_sync");
779 if pvr_sync_path.exists() {
780 jail.mount_bind(pvr_sync_path, pvr_sync_path, true)?;
781 }
782
783 // If the udmabuf driver exists on the host, bind mount it in.
784 let udmabuf_path = Path::new("/dev/udmabuf");
785 if udmabuf_path.exists() {
786 jail.mount_bind(udmabuf_path, udmabuf_path, true)?;
787 }
788
789 // Libraries that are required when mesa drivers are dynamically loaded.
Alexandre Courbot22740d82021-12-15 17:06:27 +0900790 jail_mount_bind_if_exists(
791 &mut jail,
792 &[
793 "/usr/lib",
794 "/usr/lib64",
795 "/lib",
796 "/lib64",
797 "/usr/share/glvnd",
798 "/usr/share/vulkan",
799 ],
800 )?;
Chia-I Wufffb5692021-12-01 13:25:35 -0800801
802 // pvr driver requires read access to /proc/self/task/*/comm.
803 let proc_path = Path::new("/proc");
804 jail.mount(
805 proc_path,
806 proc_path,
807 "proc",
808 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY) as usize,
809 )?;
810
811 // To enable perfetto tracing, we need to give access to the perfetto service IPC
812 // endpoints.
813 let perfetto_path = Path::new("/run/perfetto");
814 if perfetto_path.exists() {
815 jail.mount_bind(perfetto_path, perfetto_path, true)?;
816 }
817
818 Ok(Some(jail))
819 }
820 None => Ok(None),
821 }
822}
823
824#[cfg(feature = "gpu")]
David Tolnay2b089fc2019-03-04 15:33:22 -0800825fn create_gpu_device(
826 cfg: &Config,
Michael Hoyle685316f2020-09-16 15:29:20 -0700827 exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800828 gpu_device_tube: Tube,
829 resource_bridges: Vec<Tube>,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900830 wayland_socket_path: Option<&PathBuf>,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700831 x_display: Option<String>,
Chia-I Wu16fb6592021-11-10 11:45:32 -0800832 render_server_fd: Option<SafeDescriptor>,
Zach Reizner65b98f12019-11-22 17:34:58 -0800833 event_devices: Vec<EventDevice>,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700834 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800835) -> DeviceResult {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700836 let mut display_backends = vec![
837 virtio::DisplayBackend::X(x_display),
Jason Macnak60eb1fb2020-01-09 14:36:29 -0800838 virtio::DisplayBackend::Stub,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700839 ];
840
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700841 let wayland_socket_dirs = cfg
842 .wayland_socket_paths
843 .iter()
844 .map(|(_name, path)| path.parent())
845 .collect::<Option<Vec<_>>>()
Daniel Verkamp6b298582021-08-16 15:37:11 -0700846 .ok_or_else(|| anyhow!("wayland socket path has no parent or file name"))?;
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700847
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900848 if let Some(socket_path) = wayland_socket_path {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700849 display_backends.insert(
850 0,
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700851 virtio::DisplayBackend::Wayland(Some(socket_path.to_owned())),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700852 );
853 }
854
David Tolnay2b089fc2019-03-04 15:33:22 -0800855 let dev = virtio::Gpu::new(
Daniel Verkamp6b298582021-08-16 15:37:11 -0700856 exit_evt.try_clone().context("failed to clone event")?,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800857 Some(gpu_device_tube),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800858 resource_bridges,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700859 display_backends,
Jason Macnakcc7070b2019-11-06 14:48:12 -0800860 cfg.gpu_parameters.as_ref().unwrap(),
Chia-I Wu16fb6592021-11-10 11:45:32 -0800861 render_server_fd,
Zach Reizner65b98f12019-11-22 17:34:58 -0800862 event_devices,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700863 map_request,
864 cfg.sandbox,
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100865 virtio::base_features(cfg.protected_vm),
Gurchetan Singh781d9752021-02-15 17:45:22 -0800866 cfg.wayland_socket_paths.clone(),
David Tolnay2b089fc2019-03-04 15:33:22 -0800867 );
868
Chia-I Wufffb5692021-12-01 13:25:35 -0800869 let jail = match gpu_jail(cfg, "gpu_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800870 Some(mut jail) => {
John Batesb220eac2020-09-14 17:03:02 -0700871 // Prepare GPU shader disk cache directory.
872 if let Some(cache_dir) = cfg
873 .gpu_parameters
874 .as_ref()
875 .and_then(|params| params.cache_path.as_ref())
876 {
877 if cfg!(any(target_arch = "arm", target_arch = "aarch64")) && cfg.sandbox {
878 warn!("shader caching not yet supported on ARM with sandbox enabled");
879 env::set_var("MESA_GLSL_CACHE_DISABLE", "true");
880 } else {
John Bates04059732020-10-01 15:58:55 -0700881 env::set_var("MESA_GLSL_CACHE_DISABLE", "false");
John Batesb220eac2020-09-14 17:03:02 -0700882 env::set_var("MESA_GLSL_CACHE_DIR", cache_dir);
883 if let Some(cache_size) = cfg
884 .gpu_parameters
885 .as_ref()
886 .and_then(|params| params.cache_size.as_ref())
887 {
888 env::set_var("MESA_GLSL_CACHE_MAX_SIZE", cache_size);
889 }
890 let shadercache_path = Path::new(cache_dir);
891 jail.mount_bind(shadercache_path, shadercache_path, true)?;
892 }
893 }
894
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700895 // Bind mount the wayland socket's directory into jail's root. This is necessary since
896 // each new wayland context must open() the socket. If the wayland socket is ever
897 // destroyed and remade in the same host directory, new connections will be possible
898 // without restarting the wayland device.
899 for dir in &wayland_socket_dirs {
900 jail.mount_bind(dir, dir, true)?;
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700901 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800902
Fergus Dall51200512021-08-19 12:54:26 +1000903 add_current_user_to_jail(&mut jail)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800904
905 Some(jail)
906 }
907 None => None,
908 };
909
910 Ok(VirtioDeviceStub {
911 dev: Box::new(dev),
912 jail,
913 })
914}
915
Chia-I Wu16fb6592021-11-10 11:45:32 -0800916#[cfg(feature = "gpu")]
917fn start_gpu_render_server(
918 cfg: &Config,
919 render_server_parameters: &GpuRenderServerParameters,
920) -> Result<SafeDescriptor> {
921 let (server_socket, client_socket) =
922 UnixSeqpacket::pair().context("failed to create render server socket")?;
923
924 let jail = match gpu_jail(cfg, "gpu_render_server")? {
925 Some(mut jail) => {
926 // TODO(olv) bind mount and enable shader cache
927
Chia-I Wub86f7f62021-12-13 12:10:22 -0800928 // bind mount /dev/log for syslog
929 let log_path = Path::new("/dev/log");
930 if log_path.exists() {
931 jail.mount_bind(log_path, log_path, true)?;
932 }
933
Chia-I Wu16fb6592021-11-10 11:45:32 -0800934 // Run as root in the jail to keep capabilities after execve, which is needed for
935 // mounting to work. All capabilities will be dropped afterwards.
936 add_current_user_as_root_to_jail(&mut jail)?;
937
938 jail
939 }
940 None => Minijail::new().context("failed to create jail")?,
941 };
942
943 let inheritable_fds = [
944 server_socket.as_raw_descriptor(),
945 libc::STDOUT_FILENO,
946 libc::STDERR_FILENO,
947 ];
948
949 let cmd = &render_server_parameters.path;
950 let cmd_str = cmd
951 .to_str()
952 .ok_or_else(|| anyhow!("invalid render server path"))?;
953 let fd_str = server_socket.as_raw_descriptor().to_string();
954 let args = [cmd_str, "--socket-fd", &fd_str];
955
956 jail.run(cmd, &inheritable_fds, &args)
957 .context("failed to start gpu render server")?;
958
959 Ok(SafeDescriptor::from(client_socket))
960}
961
David Tolnay2b089fc2019-03-04 15:33:22 -0800962fn create_wayland_device(
963 cfg: &Config,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800964 control_tube: Tube,
965 resource_bridge: Option<Tube>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800966) -> DeviceResult {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900967 let wayland_socket_dirs = cfg
968 .wayland_socket_paths
969 .iter()
970 .map(|(_name, path)| path.parent())
971 .collect::<Option<Vec<_>>>()
Daniel Verkamp6b298582021-08-16 15:37:11 -0700972 .ok_or_else(|| anyhow!("wayland socket path has no parent or file name"))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800973
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100974 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +0100975 let dev = virtio::Wl::new(
976 features,
977 cfg.wayland_socket_paths.clone(),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800978 control_tube,
Will Deacon81d5adb2020-10-06 18:37:48 +0100979 resource_bridge,
980 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700981 .context("failed to create wayland device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800982
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700983 let jail = match simple_jail(cfg, "wl_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800984 Some(mut jail) => {
985 // Create a tmpfs in the device's root directory so that we can bind mount the wayland
986 // socket directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
987 jail.mount_with_data(
988 Path::new("none"),
989 Path::new("/"),
990 "tmpfs",
991 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
992 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800993 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800994
995 // Bind mount the wayland socket's directory into jail's root. This is necessary since
996 // each new wayland context must open() the socket. If the wayland socket is ever
997 // destroyed and remade in the same host directory, new connections will be possible
998 // without restarting the wayland device.
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900999 for dir in &wayland_socket_dirs {
1000 jail.mount_bind(dir, dir, true)?;
1001 }
Fergus Dall51200512021-08-19 12:54:26 +10001002 add_current_user_to_jail(&mut jail)?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001003
1004 Some(jail)
1005 }
1006 None => None,
1007 };
1008
1009 Ok(VirtioDeviceStub {
1010 dev: Box::new(dev),
1011 jail,
1012 })
1013}
1014
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001015#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
1016fn create_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001017 backend: VideoBackendType,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001018 cfg: &Config,
1019 typ: devices::virtio::VideoDeviceType,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001020 resource_bridge: Tube,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001021) -> DeviceResult {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001022 let jail = match simple_jail(cfg, "video_device")? {
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001023 Some(mut jail) => {
1024 match typ {
Alexandre Courbot8230abf2021-06-26 22:49:26 +09001025 #[cfg(feature = "video-decoder")]
Fergus Dall51200512021-08-19 12:54:26 +10001026 devices::virtio::VideoDeviceType::Decoder => add_current_user_to_jail(&mut jail)?,
Alexandre Courbot8230abf2021-06-26 22:49:26 +09001027 #[cfg(feature = "video-encoder")]
Fergus Dall51200512021-08-19 12:54:26 +10001028 devices::virtio::VideoDeviceType::Encoder => add_current_user_to_jail(&mut jail)?,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001029 };
1030
1031 // Create a tmpfs in the device's root directory so that we can bind mount files.
1032 jail.mount_with_data(
1033 Path::new("none"),
1034 Path::new("/"),
1035 "tmpfs",
1036 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
1037 "size=67108864",
1038 )?;
1039
Alexandre Courbotc02960d2021-07-11 23:06:30 +09001040 #[cfg(feature = "libvda")]
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001041 // Render node for libvda.
Alexandre Courbot54cf8342021-12-20 18:10:08 +09001042 if backend == VideoBackendType::Libvda || backend == VideoBackendType::LibvdaVd {
Chih-Yu Huangd2c2bd12021-12-06 14:09:59 +09001043 // follow the implementation at:
1044 // https://source.corp.google.com/chromeos_public/src/platform/minigbm/cros_gralloc/cros_gralloc_driver.cc;l=90;bpv=0;cl=c06cc9cccb3cf3c7f9d2aec706c27c34cd6162a0
1045 const DRM_NUM_NODES: u32 = 63;
1046 const DRM_RENDER_NODE_START: u32 = 128;
1047 for offset in 0..DRM_NUM_NODES {
1048 let path_str = format!("/dev/dri/renderD{}", DRM_RENDER_NODE_START + offset);
1049 let dev_dri_path = Path::new(&path_str);
1050 if !dev_dri_path.exists() {
1051 break;
1052 }
1053 jail.mount_bind(dev_dri_path, dev_dri_path, false)?;
1054 }
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001055 }
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001056
David Stevense341d0a2020-10-08 18:02:32 +09001057 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1058 {
1059 // Device nodes used by libdrm through minigbm in libvda on AMD devices.
1060 let sys_dev_char_path = Path::new("/sys/dev/char");
1061 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
1062 let sys_devices_path = Path::new("/sys/devices");
1063 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
1064
1065 // Required for loading dri libraries loaded by minigbm on AMD devices.
Alexandre Courbot22740d82021-12-15 17:06:27 +09001066 jail_mount_bind_if_exists(&mut jail, &["/usr/lib64"])?;
David Stevense341d0a2020-10-08 18:02:32 +09001067 }
1068
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001069 // Device nodes required by libchrome which establishes Mojo connection in libvda.
1070 let dev_urandom_path = Path::new("/dev/urandom");
1071 jail.mount_bind(dev_urandom_path, dev_urandom_path, false)?;
1072 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
1073 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
1074
1075 Some(jail)
1076 }
1077 None => None,
1078 };
1079
1080 Ok(VirtioDeviceStub {
1081 dev: Box::new(devices::virtio::VideoDevice::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001082 virtio::base_features(cfg.protected_vm),
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001083 typ,
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001084 backend,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001085 Some(resource_bridge),
1086 )),
1087 jail,
1088 })
1089}
1090
1091#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
1092fn register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001093 backend: VideoBackendType,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001094 devs: &mut Vec<VirtioDeviceStub>,
Daniel Verkampffb59122021-03-18 14:06:15 -07001095 video_tube: Tube,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001096 cfg: &Config,
1097 typ: devices::virtio::VideoDeviceType,
Daniel Verkamp6b298582021-08-16 15:37:11 -07001098) -> Result<()> {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001099 devs.push(create_video_device(backend, cfg, typ, video_tube)?);
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001100 Ok(())
1101}
1102
Chirantan Ekbote3e8d52b2021-09-10 18:27:16 +09001103fn create_vhost_vsock_device(cfg: &Config, cid: u64) -> DeviceResult {
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001104 let features = virtio::base_features(cfg.protected_vm);
Christian Blichmann50f95912021-11-05 16:59:39 +01001105
1106 let device_file = match cfg
1107 .vhost_vsock_device
1108 .as_ref()
1109 .unwrap_or(&VhostVsockDeviceParameter::default())
1110 {
1111 VhostVsockDeviceParameter::Fd(fd) => {
1112 let fd = validate_raw_descriptor(*fd)
1113 .context("failed to validate fd for virtual socker device")?;
1114 // Safe because the `fd` is actually owned by this process and
1115 // we have a unique handle to it.
1116 unsafe { File::from_raw_fd(fd) }
1117 }
1118 VhostVsockDeviceParameter::Path(path) => OpenOptions::new()
1119 .read(true)
1120 .write(true)
1121 .custom_flags(libc::O_CLOEXEC | libc::O_NONBLOCK)
1122 .open(path)
1123 .context("failed to open virtual socket device")?,
1124 };
1125
1126 let dev = virtio::vhost::Vsock::new(device_file, features, cid)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001127 .context("failed to set up virtual socket device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001128
1129 Ok(VirtioDeviceStub {
1130 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001131 jail: simple_jail(cfg, "vhost_vsock_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -08001132 })
1133}
1134
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001135fn create_fs_device(
1136 cfg: &Config,
1137 uid_map: &str,
1138 gid_map: &str,
1139 src: &Path,
1140 tag: &str,
1141 fs_cfg: virtio::fs::passthrough::Config,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001142 device_tube: Tube,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001143) -> DeviceResult {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001144 let max_open_files =
1145 base::get_max_open_files().context("failed to get max number of open files")?;
Matt Delcoc24ad782020-02-14 13:24:36 -08001146 let j = if cfg.sandbox {
1147 let seccomp_policy = cfg.seccomp_policy_dir.join("fs_device");
1148 let config = SandboxConfig {
1149 limit_caps: false,
1150 uid_map: Some(uid_map),
1151 gid_map: Some(gid_map),
1152 log_failures: cfg.seccomp_log_failures,
1153 seccomp_policy: &seccomp_policy,
1154 };
Chirantan Ekbote34d45e52020-04-20 18:15:02 +09001155 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
1156 // We want bind mounts from the parent namespaces to propagate into the fs device's
1157 // namespace.
1158 jail.set_remount_mode(libc::MS_SLAVE);
1159
1160 jail
Matt Delcoc24ad782020-02-14 13:24:36 -08001161 } else {
1162 create_base_minijail(src, Some(max_open_files), None)?
1163 };
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001164
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001165 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001166 // TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic
1167 // when num_queues > 1.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001168 let dev = virtio::fs::Fs::new(features, tag, 1, fs_cfg, device_tube)
1169 .context("failed to create fs device")?;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001170
1171 Ok(VirtioDeviceStub {
1172 dev: Box::new(dev),
1173 jail: Some(j),
1174 })
1175}
1176
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001177fn create_9p_device(
1178 cfg: &Config,
1179 uid_map: &str,
1180 gid_map: &str,
1181 src: &Path,
1182 tag: &str,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001183 mut p9_cfg: p9::Config,
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001184) -> DeviceResult {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001185 let max_open_files =
1186 base::get_max_open_files().context("failed to get max number of open files")?;
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001187 let (jail, root) = if cfg.sandbox {
1188 let seccomp_policy = cfg.seccomp_policy_dir.join("9p_device");
1189 let config = SandboxConfig {
1190 limit_caps: false,
1191 uid_map: Some(uid_map),
1192 gid_map: Some(gid_map),
1193 log_failures: cfg.seccomp_log_failures,
1194 seccomp_policy: &seccomp_policy,
1195 };
David Tolnay2b089fc2019-03-04 15:33:22 -08001196
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001197 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
1198 // We want bind mounts from the parent namespaces to propagate into the 9p server's
1199 // namespace.
1200 jail.set_remount_mode(libc::MS_SLAVE);
Chirantan Ekbote055de382020-01-24 12:16:58 +09001201
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001202 // The shared directory becomes the root of the device's file system.
1203 let root = Path::new("/");
1204 (Some(jail), root)
1205 } else {
1206 // There's no mount namespace so we tell the server to treat the source directory as the
1207 // root.
1208 (None, src)
David Tolnay2b089fc2019-03-04 15:33:22 -08001209 };
1210
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001211 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001212 p9_cfg.root = root.into();
Daniel Verkamp6b298582021-08-16 15:37:11 -07001213 let dev = virtio::P9::new(features, tag, p9_cfg).context("failed to create 9p device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001214
1215 Ok(VirtioDeviceStub {
1216 dev: Box::new(dev),
1217 jail,
1218 })
1219}
1220
Jakub Starona3411ea2019-04-24 10:55:25 -07001221fn create_pmem_device(
1222 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001223 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001224 resources: &mut SystemAllocator,
1225 disk: &DiskOption,
1226 index: usize,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001227 pmem_device_tube: Tube,
Jakub Starona3411ea2019-04-24 10:55:25 -07001228) -> DeviceResult {
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09001229 let fd = open_file(&disk.path, disk.read_only, false /*O_DIRECT*/)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001230 .with_context(|| format!("failed to load disk image {}", disk.path.display()))?;
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001231
1232 let (disk_size, arena_size) = {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001233 let metadata = std::fs::metadata(&disk.path).with_context(|| {
1234 format!("failed to get disk image {} metadata", disk.path.display())
1235 })?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001236 let disk_len = metadata.len();
1237 // Linux requires pmem region sizes to be 2 MiB aligned. Linux will fill any partial page
1238 // at the end of an mmap'd file and won't write back beyond the actual file length, but if
1239 // we just align the size of the file to 2 MiB then access beyond the last page of the
1240 // mapped file will generate SIGBUS. So use a memory mapping arena that will provide
1241 // padding up to 2 MiB.
1242 let alignment = 2 * 1024 * 1024;
1243 let align_adjust = if disk_len % alignment != 0 {
1244 alignment - (disk_len % alignment)
1245 } else {
1246 0
1247 };
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001248 (
1249 disk_len,
1250 disk_len
1251 .checked_add(align_adjust)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001252 .ok_or_else(|| anyhow!("pmem device image too big"))?,
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001253 )
Jakub Starona3411ea2019-04-24 10:55:25 -07001254 };
1255
1256 let protection = {
1257 if disk.read_only {
1258 Protection::read()
1259 } else {
1260 Protection::read_write()
1261 }
1262 };
1263
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001264 let arena = {
Jakub Starona3411ea2019-04-24 10:55:25 -07001265 // Conversion from u64 to usize may fail on 32bit system.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001266 let arena_size = usize::try_from(arena_size).context("pmem device image too big")?;
1267 let disk_size = usize::try_from(disk_size).context("pmem device image too big")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001268
Daniel Verkamp6b298582021-08-16 15:37:11 -07001269 let mut arena =
1270 MemoryMappingArena::new(arena_size).context("failed to reserve pmem memory")?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001271 arena
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001272 .add_fd_offset_protection(0, disk_size, &fd, 0, protection)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001273 .context("failed to reserve pmem memory")?;
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001274
1275 // If the disk is not a multiple of the page size, the OS will fill the remaining part
1276 // of the page with zeroes. However, the anonymous mapping added below must start on a
1277 // page boundary, so round up the size before calculating the offset of the anon region.
1278 let disk_size = round_up_to_page_size(disk_size);
1279
1280 if arena_size > disk_size {
1281 // Add an anonymous region with the same protection as the disk mapping if the arena
1282 // size was aligned.
1283 arena
1284 .add_anon_protection(disk_size, arena_size - disk_size, protection)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001285 .context("failed to reserve pmem padding")?;
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001286 }
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001287 arena
Jakub Starona3411ea2019-04-24 10:55:25 -07001288 };
1289
1290 let mapping_address = resources
Xiong Zhang383b3b52019-10-30 14:59:26 +08001291 .mmio_allocator(MmioType::High)
Daniel Verkamp57e4f542021-10-28 09:56:40 -07001292 .reverse_allocate_with_align(
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001293 arena_size,
Jakub Starona3411ea2019-04-24 10:55:25 -07001294 Alloc::PmemDevice(index),
1295 format!("pmem_disk_image_{}", index),
1296 // Linux kernel requires pmem namespaces to be 128 MiB aligned.
1297 128 * 1024 * 1024, /* 128 MiB */
1298 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001299 .context("failed to allocate memory for pmem device")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001300
Daniel Verkampe1980a92020-02-07 11:00:55 -08001301 let slot = vm
Gurchetan Singh173fe622020-05-21 18:05:06 -07001302 .add_memory_region(
Daniel Verkampe1980a92020-02-07 11:00:55 -08001303 GuestAddress(mapping_address),
Gurchetan Singh173fe622020-05-21 18:05:06 -07001304 Box::new(arena),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001305 /* read_only = */ disk.read_only,
1306 /* log_dirty_pages = */ false,
1307 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001308 .context("failed to add pmem device memory")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001309
Daniel Verkampe1980a92020-02-07 11:00:55 -08001310 let dev = virtio::Pmem::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001311 virtio::base_features(cfg.protected_vm),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001312 fd,
1313 GuestAddress(mapping_address),
1314 slot,
1315 arena_size,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001316 Some(pmem_device_tube),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001317 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001318 .context("failed to create pmem device")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001319
1320 Ok(VirtioDeviceStub {
1321 dev: Box::new(dev) as Box<dyn VirtioDevice>,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001322 jail: simple_jail(cfg, "pmem_device")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07001323 })
1324}
1325
Zide Chendfc4b882021-03-10 16:35:37 -08001326fn create_iommu_device(
1327 cfg: &Config,
Zide Chen71435c12021-03-03 15:02:02 -08001328 phys_max_addr: u64,
Zide Chendfc4b882021-03-10 16:35:37 -08001329 endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
1330) -> DeviceResult {
Zide Chen71435c12021-03-03 15:02:02 -08001331 let dev = virtio::Iommu::new(
1332 virtio::base_features(cfg.protected_vm),
1333 endpoints,
1334 phys_max_addr,
1335 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001336 .context("failed to create IOMMU device")?;
Zide Chendfc4b882021-03-10 16:35:37 -08001337
1338 Ok(VirtioDeviceStub {
1339 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001340 jail: simple_jail(cfg, "iommu_device")?,
Zide Chendfc4b882021-03-10 16:35:37 -08001341 })
1342}
1343
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001344fn create_console_device(cfg: &Config, param: &SerialParameters) -> DeviceResult {
Michael Hoylecd23bc22020-10-20 22:12:20 -07001345 let mut keep_rds = Vec::new();
Daniel Verkamp6b298582021-08-16 15:37:11 -07001346 let evt = Event::new().context("failed to create event")?;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001347 let dev = param
Michael Hoylecd23bc22020-10-20 22:12:20 -07001348 .create_serial_device::<Console>(cfg.protected_vm, &evt, &mut keep_rds)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001349 .context("failed to create console device")?;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001350
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001351 let jail = match simple_jail(cfg, "serial")? {
Nicholas Verne71e73d82020-07-08 17:19:55 +10001352 Some(mut jail) => {
1353 // Create a tmpfs in the device's root directory so that we can bind mount the
1354 // log socket directory into it.
1355 // The size=67108864 is size=64*1024*1024 or size=64MB.
1356 jail.mount_with_data(
1357 Path::new("none"),
1358 Path::new("/"),
1359 "tmpfs",
1360 (libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_NOSUID) as usize,
1361 "size=67108864",
1362 )?;
Fergus Dall51200512021-08-19 12:54:26 +10001363 add_current_user_to_jail(&mut jail)?;
Nicholas Verne71e73d82020-07-08 17:19:55 +10001364 let res = param.add_bind_mounts(&mut jail);
1365 if res.is_err() {
1366 error!("failed to add bind mounts for console device");
1367 }
1368 Some(jail)
1369 }
1370 None => None,
1371 };
1372
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001373 Ok(VirtioDeviceStub {
1374 dev: Box::new(dev),
Nicholas Verne71e73d82020-07-08 17:19:55 +10001375 jail, // TODO(dverkamp): use a separate policy for console?
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001376 })
1377}
1378
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001379#[cfg(feature = "audio")]
1380fn create_sound_device(path: &Path, cfg: &Config) -> DeviceResult {
1381 let dev = virtio::new_sound(path, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001382 .context("failed to create sound device")?;
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001383
1384 Ok(VirtioDeviceStub {
1385 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001386 jail: simple_jail(cfg, "vios_audio_device")?,
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001387 })
1388}
1389
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001390// gpu_device_tube is not used when GPU support is disabled.
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -07001391#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001392fn create_virtio_devices(
1393 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001394 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001395 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001396 _exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001397 wayland_device_tube: Tube,
1398 gpu_device_tube: Tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001399 vhost_user_gpu_tubes: Vec<(Tube, Tube)>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001400 balloon_device_tube: Tube,
1401 disk_device_tubes: &mut Vec<Tube>,
1402 pmem_device_tubes: &mut Vec<Tube>,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001403 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001404 fs_device_tubes: &mut Vec<Tube>,
David Tolnay2b089fc2019-03-04 15:33:22 -08001405) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -07001406 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -08001407
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001408 for (_, param) in cfg
1409 .serial_parameters
1410 .iter()
1411 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
1412 {
1413 let dev = create_console_device(cfg, param)?;
1414 devs.push(dev);
1415 }
1416
Zach Reizner8fb52112017-12-13 16:04:39 -08001417 for disk in &cfg.disks {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001418 let disk_device_tube = disk_device_tubes.remove(0);
1419 devs.push(create_block_device(cfg, disk, disk_device_tube)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001420 }
1421
Keiichi Watanabef3a37f42021-01-21 15:41:11 +09001422 for blk in &cfg.vhost_user_blk {
1423 devs.push(create_vhost_user_block_device(cfg, blk)?);
1424 }
1425
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +09001426 for console in &cfg.vhost_user_console {
1427 devs.push(create_vhost_user_console_device(cfg, console)?);
1428 }
1429
Jakub Starona3411ea2019-04-24 10:55:25 -07001430 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001431 let pmem_device_tube = pmem_device_tubes.remove(0);
Daniel Verkampe1980a92020-02-07 11:00:55 -08001432 devs.push(create_pmem_device(
1433 cfg,
1434 vm,
1435 resources,
1436 pmem_disk,
1437 index,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001438 pmem_device_tube,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001439 )?);
Jakub Starona3411ea2019-04-24 10:55:25 -07001440 }
1441
David Tolnay2b089fc2019-03-04 15:33:22 -08001442 devs.push(create_rng_device(cfg)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001443
Woody Chow737ff122021-03-22 17:49:57 +09001444 #[cfg(feature = "audio_cras")]
1445 {
Chih-Yang Hsia41dc04f2021-12-08 16:04:23 +08001446 for cras_snd in &cfg.cras_snds {
Woody Chow0b2b6062021-09-03 15:40:02 +09001447 devs.push(create_cras_snd_device(cfg, cras_snd.clone())?);
Woody Chow737ff122021-03-22 17:49:57 +09001448 }
1449 }
1450
David Tolnayde6b29a2018-12-20 11:49:46 -08001451 #[cfg(feature = "tpm")]
1452 {
David Tolnay43f8e212019-02-13 17:28:16 -08001453 if cfg.software_tpm {
David Tolnay2b089fc2019-03-04 15:33:22 -08001454 devs.push(create_tpm_device(cfg)?);
David Tolnay43f8e212019-02-13 17:28:16 -08001455 }
David Tolnayde6b29a2018-12-20 11:49:46 -08001456 }
1457
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001458 for (idx, single_touch_spec) in cfg.virtio_single_touch.iter().enumerate() {
1459 devs.push(create_single_touch_device(
1460 cfg,
1461 single_touch_spec,
1462 idx as u32,
1463 )?);
Jorge E. Moreira99d3f082019-03-07 10:59:54 -08001464 }
1465
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001466 for (idx, multi_touch_spec) in cfg.virtio_multi_touch.iter().enumerate() {
1467 devs.push(create_multi_touch_device(
1468 cfg,
1469 multi_touch_spec,
1470 idx as u32,
1471 )?);
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001472 }
1473
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001474 for (idx, trackpad_spec) in cfg.virtio_trackpad.iter().enumerate() {
1475 devs.push(create_trackpad_device(cfg, trackpad_spec, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001476 }
1477
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001478 for (idx, mouse_socket) in cfg.virtio_mice.iter().enumerate() {
1479 devs.push(create_mouse_device(cfg, mouse_socket, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001480 }
1481
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001482 for (idx, keyboard_socket) in cfg.virtio_keyboard.iter().enumerate() {
1483 devs.push(create_keyboard_device(cfg, keyboard_socket, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001484 }
1485
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001486 for (idx, switches_socket) in cfg.virtio_switches.iter().enumerate() {
1487 devs.push(create_switches_device(cfg, switches_socket, idx as u32)?);
Daniel Norman5e23df72021-03-11 10:11:02 -08001488 }
1489
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001490 for dev_path in &cfg.virtio_input_evdevs {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001491 devs.push(create_vinput_device(cfg, dev_path)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001492 }
1493
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001494 devs.push(create_balloon_device(cfg, balloon_device_tube)?);
Dylan Reid295ccac2017-11-06 14:06:24 -08001495
Zach Reizner39aa26b2017-12-12 18:03:23 -08001496 // We checked above that if the IP is defined, then the netmask is, too.
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001497 for tap_fd in &cfg.tap_fd {
Alexandre Courbot911773a2021-12-10 14:31:10 +09001498 devs.push(create_tap_net_device_from_fd(cfg, *tap_fd)?);
Jorge E. Moreirab7952802019-02-12 16:43:05 -08001499 }
1500
David Tolnay2b089fc2019-03-04 15:33:22 -08001501 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
1502 (cfg.host_ip, cfg.netmask, cfg.mac_address)
1503 {
Keiichi Watanabe60686582021-03-12 04:53:51 +09001504 if !cfg.vhost_user_net.is_empty() {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001505 bail!("vhost-user-net cannot be used with any of --host_ip, --netmask or --mac");
Keiichi Watanabe60686582021-03-12 04:53:51 +09001506 }
Alexandre Courbot911773a2021-12-10 14:31:10 +09001507 devs.push(create_net_device_from_config(
1508 cfg,
1509 host_ip,
1510 netmask,
1511 mac_address,
1512 )?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001513 }
1514
Alexandre Courbot993aa7f2021-12-09 14:51:29 +09001515 for tap_name in &cfg.tap_name {
1516 devs.push(create_tap_net_device_from_name(cfg, tap_name.as_bytes())?);
1517 }
1518
Keiichi Watanabe60686582021-03-12 04:53:51 +09001519 for net in &cfg.vhost_user_net {
1520 devs.push(create_vhost_user_net_device(cfg, net)?);
1521 }
1522
Chirantan Ekbote84091e52021-09-10 18:43:17 +09001523 for vsock in &cfg.vhost_user_vsock {
1524 devs.push(create_vhost_user_vsock_device(cfg, vsock)?);
1525 }
1526
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09001527 for opt in &cfg.vhost_user_wl {
1528 devs.push(create_vhost_user_wl_device(cfg, opt)?);
1529 }
1530
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001531 #[cfg(feature = "gpu")]
1532 for (opt, (host_tube, device_tube)) in cfg.vhost_user_gpu.iter().zip(vhost_user_gpu_tubes) {
1533 devs.push(create_vhost_user_gpu_device(
1534 cfg,
1535 opt,
1536 host_tube,
1537 device_tube,
1538 )?);
1539 }
1540
David Tolnayfa701712019-02-13 16:42:54 -08001541 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001542 let mut resource_bridges = Vec::<Tube>::new();
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001543
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001544 if !cfg.wayland_socket_paths.is_empty() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001545 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001546 let mut wl_resource_bridge = None::<Tube>;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001547
1548 #[cfg(feature = "gpu")]
1549 {
Jason Macnakcc7070b2019-11-06 14:48:12 -08001550 if cfg.gpu_parameters.is_some() {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001551 let (wl_socket, gpu_socket) = Tube::pair().context("failed to create tube")?;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001552 resource_bridges.push(gpu_socket);
1553 wl_resource_bridge = Some(wl_socket);
1554 }
1555 }
1556
1557 devs.push(create_wayland_device(
1558 cfg,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001559 wayland_device_tube,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001560 wl_resource_bridge,
1561 )?);
1562 }
David Tolnayfa701712019-02-13 16:42:54 -08001563
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001564 #[cfg(feature = "video-decoder")]
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001565 let video_dec_cfg = if let Some(backend) = cfg.video_dec {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001566 let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
Daniel Verkampffb59122021-03-18 14:06:15 -07001567 resource_bridges.push(gpu_tube);
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001568 Some((video_tube, backend))
Daniel Verkampffb59122021-03-18 14:06:15 -07001569 } else {
1570 None
1571 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001572
1573 #[cfg(feature = "video-encoder")]
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001574 let video_enc_cfg = if let Some(backend) = cfg.video_enc {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001575 let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
Daniel Verkampffb59122021-03-18 14:06:15 -07001576 resource_bridges.push(gpu_tube);
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001577 Some((video_tube, backend))
Daniel Verkampffb59122021-03-18 14:06:15 -07001578 } else {
1579 None
1580 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001581
Zach Reizner3a8100a2017-09-13 19:15:43 -07001582 #[cfg(feature = "gpu")]
1583 {
Noah Golddc7f52b2020-02-01 13:01:58 -08001584 if let Some(gpu_parameters) = &cfg.gpu_parameters {
Jason Macnakd659a0d2021-03-15 15:33:01 -07001585 let mut gpu_display_w = DEFAULT_DISPLAY_WIDTH;
1586 let mut gpu_display_h = DEFAULT_DISPLAY_HEIGHT;
1587 if !gpu_parameters.displays.is_empty() {
1588 gpu_display_w = gpu_parameters.displays[0].width;
1589 gpu_display_h = gpu_parameters.displays[0].height;
1590 }
1591
Zach Reizner65b98f12019-11-22 17:34:58 -08001592 let mut event_devices = Vec::new();
1593 if cfg.display_window_mouse {
1594 let (event_device_socket, virtio_dev_socket) =
Daniel Verkamp6b298582021-08-16 15:37:11 -07001595 UnixStream::pair().context("failed to create socket")?;
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001596 let (multi_touch_width, multi_touch_height) = cfg
1597 .virtio_multi_touch
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001598 .first()
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001599 .as_ref()
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001600 .map(|multi_touch_spec| multi_touch_spec.get_size())
Jason Macnakd659a0d2021-03-15 15:33:01 -07001601 .unwrap_or((gpu_display_w, gpu_display_h));
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001602 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001603 // u32::MAX is the least likely to collide with the indices generated above for
1604 // the multi_touch options, which begin at 0.
1605 u32::MAX,
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001606 virtio_dev_socket,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001607 multi_touch_width,
1608 multi_touch_height,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001609 virtio::base_features(cfg.protected_vm),
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001610 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001611 .context("failed to set up mouse device")?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001612 devs.push(VirtioDeviceStub {
1613 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001614 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001615 });
1616 event_devices.push(EventDevice::touchscreen(event_device_socket));
1617 }
1618 if cfg.display_window_keyboard {
1619 let (event_device_socket, virtio_dev_socket) =
Daniel Verkamp6b298582021-08-16 15:37:11 -07001620 UnixStream::pair().context("failed to create socket")?;
Noah Goldd4ca29b2020-10-27 12:21:52 -07001621 let dev = virtio::new_keyboard(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001622 // u32::MAX is the least likely to collide with the indices generated above for
1623 // the multi_touch options, which begin at 0.
1624 u32::MAX,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001625 virtio_dev_socket,
1626 virtio::base_features(cfg.protected_vm),
1627 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001628 .context("failed to set up keyboard device")?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001629 devs.push(VirtioDeviceStub {
1630 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001631 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001632 });
1633 event_devices.push(EventDevice::keyboard(event_device_socket));
1634 }
Chia-I Wu16fb6592021-11-10 11:45:32 -08001635
1636 let mut render_server_fd = None;
1637 if let Some(ref render_server_parameters) = gpu_parameters.render_server {
1638 render_server_fd = Some(start_gpu_render_server(cfg, render_server_parameters)?);
1639 }
1640
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001641 devs.push(create_gpu_device(
1642 cfg,
1643 _exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001644 gpu_device_tube,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001645 resource_bridges,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001646 // Use the unnamed socket for GPU display screens.
1647 cfg.wayland_socket_paths.get(""),
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001648 cfg.x_display.clone(),
Chia-I Wu16fb6592021-11-10 11:45:32 -08001649 render_server_fd,
Zach Reizner65b98f12019-11-22 17:34:58 -08001650 event_devices,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001651 map_request,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001652 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -07001653 }
1654 }
1655
Daniel Verkampffb59122021-03-18 14:06:15 -07001656 #[cfg(feature = "video-decoder")]
1657 {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001658 if let Some((video_dec_tube, video_dec_backend)) = video_dec_cfg {
Daniel Verkampffb59122021-03-18 14:06:15 -07001659 register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001660 video_dec_backend,
Daniel Verkampffb59122021-03-18 14:06:15 -07001661 &mut devs,
1662 video_dec_tube,
1663 cfg,
1664 devices::virtio::VideoDeviceType::Decoder,
1665 )?;
1666 }
1667 }
1668
1669 #[cfg(feature = "video-encoder")]
1670 {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001671 if let Some((video_enc_tube, video_enc_backend)) = video_enc_cfg {
Daniel Verkampffb59122021-03-18 14:06:15 -07001672 register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001673 video_enc_backend,
Daniel Verkampffb59122021-03-18 14:06:15 -07001674 &mut devs,
1675 video_enc_tube,
1676 cfg,
1677 devices::virtio::VideoDeviceType::Encoder,
1678 )?;
1679 }
1680 }
1681
Zach Reizneraa575662018-08-15 10:46:32 -07001682 if let Some(cid) = cfg.cid {
Chirantan Ekbote3e8d52b2021-09-10 18:27:16 +09001683 devs.push(create_vhost_vsock_device(cfg, cid)?);
Zach Reizneraa575662018-08-15 10:46:32 -07001684 }
1685
Woody Chow5890b702021-02-12 14:57:02 +09001686 for vhost_user_fs in &cfg.vhost_user_fs {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001687 devs.push(create_vhost_user_fs_device(cfg, vhost_user_fs)?);
Woody Chow5890b702021-02-12 14:57:02 +09001688 }
1689
Woody Chow1b16db12021-04-02 16:59:59 +09001690 #[cfg(feature = "audio")]
1691 for vhost_user_snd in &cfg.vhost_user_snd {
1692 devs.push(create_vhost_user_snd_device(cfg, vhost_user_snd)?);
1693 }
1694
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001695 for shared_dir in &cfg.shared_dirs {
1696 let SharedDir {
1697 src,
1698 tag,
1699 kind,
1700 uid_map,
1701 gid_map,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001702 fs_cfg,
1703 p9_cfg,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001704 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -08001705
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001706 let dev = match kind {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001707 SharedDirKind::FS => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001708 let device_tube = fs_device_tubes.remove(0);
1709 create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone(), device_tube)?
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001710 }
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001711 SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag, p9_cfg.clone())?,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001712 };
1713 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -08001714 }
1715
JaeMan Parkeb9cc532021-07-02 15:02:59 +09001716 if let Some(vhost_user_mac80211_hwsim) = &cfg.vhost_user_mac80211_hwsim {
1717 devs.push(create_vhost_user_mac80211_hwsim_device(
1718 cfg,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001719 vhost_user_mac80211_hwsim,
JaeMan Parkeb9cc532021-07-02 15:02:59 +09001720 )?);
1721 }
1722
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001723 #[cfg(feature = "audio")]
1724 if let Some(path) = &cfg.sound {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001725 devs.push(create_sound_device(path, cfg)?);
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001726 }
1727
David Tolnay2b089fc2019-03-04 15:33:22 -08001728 Ok(devs)
1729}
1730
Xiong Zhang10f15052021-04-08 17:23:33 +08001731fn create_vfio_device(
1732 cfg: &Config,
1733 vm: &impl Vm,
1734 resources: &mut SystemAllocator,
1735 control_tubes: &mut Vec<TaggedControlTube>,
1736 vfio_path: &Path,
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001737 bus_num: Option<u8>,
Zide Chendfc4b882021-03-10 16:35:37 -08001738 endpoints: &mut BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
1739 iommu_enabled: bool,
Xiong Zhang10f15052021-04-08 17:23:33 +08001740) -> DeviceResult<(Box<VfioPciDevice>, Option<Minijail>)> {
Zide Chendfc4b882021-03-10 16:35:37 -08001741 let vfio_container = VfioCommonSetup::vfio_get_container(vfio_path, iommu_enabled)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001742 .context("failed to get vfio container")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001743
1744 // create MSI, MSI-X, and Mem request sockets for each vfio device
Daniel Verkamp6b298582021-08-16 15:37:11 -07001745 let (vfio_host_tube_msi, vfio_device_tube_msi) =
1746 Tube::pair().context("failed to create tube")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001747 control_tubes.push(TaggedControlTube::VmIrq(vfio_host_tube_msi));
1748
Daniel Verkamp6b298582021-08-16 15:37:11 -07001749 let (vfio_host_tube_msix, vfio_device_tube_msix) =
1750 Tube::pair().context("failed to create tube")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001751 control_tubes.push(TaggedControlTube::VmIrq(vfio_host_tube_msix));
1752
Daniel Verkamp6b298582021-08-16 15:37:11 -07001753 let (vfio_host_tube_mem, vfio_device_tube_mem) =
1754 Tube::pair().context("failed to create tube")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001755 control_tubes.push(TaggedControlTube::VmMemory(vfio_host_tube_mem));
1756
Keiichi Watanabe7b805542021-09-03 02:13:51 +09001757 let vfio_device =
1758 VfioDevice::new_passthrough(&vfio_path, vm, vfio_container.clone(), iommu_enabled)
1759 .context("failed to create vfio device")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001760 let mut vfio_pci_device = Box::new(VfioPciDevice::new(
1761 vfio_device,
Xiong Zhange19ab752021-05-20 18:18:46 +08001762 bus_num,
Xiong Zhang10f15052021-04-08 17:23:33 +08001763 vfio_device_tube_msi,
1764 vfio_device_tube_msix,
1765 vfio_device_tube_mem,
1766 ));
1767 // early reservation for pass-through PCI devices.
Zide Chendfc4b882021-03-10 16:35:37 -08001768 let endpoint_addr = vfio_pci_device.allocate_address(resources);
1769 if endpoint_addr.is_err() {
Xiong Zhang10f15052021-04-08 17:23:33 +08001770 warn!(
1771 "address reservation failed for vfio {}",
1772 vfio_pci_device.debug_label()
1773 );
1774 }
1775
Zide Chendfc4b882021-03-10 16:35:37 -08001776 if iommu_enabled {
1777 endpoints.insert(endpoint_addr.unwrap().to_u32(), vfio_container);
1778 }
1779
Xiong Zhang10f15052021-04-08 17:23:33 +08001780 Ok((vfio_pci_device, simple_jail(cfg, "vfio_device")?))
1781}
1782
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001783fn create_vfio_platform_device(
1784 cfg: &Config,
1785 vm: &impl Vm,
1786 _resources: &mut SystemAllocator,
1787 control_tubes: &mut Vec<TaggedControlTube>,
1788 vfio_path: &Path,
1789 _endpoints: &mut BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
1790 iommu_enabled: bool,
1791) -> DeviceResult<(VfioPlatformDevice, Option<Minijail>)> {
1792 let vfio_container = VfioCommonSetup::vfio_get_container(vfio_path, iommu_enabled)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001793 .context("Failed to create vfio device")?;
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001794
Daniel Verkamp6b298582021-08-16 15:37:11 -07001795 let (vfio_host_tube_mem, vfio_device_tube_mem) =
1796 Tube::pair().context("failed to create tube")?;
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001797 control_tubes.push(TaggedControlTube::VmMemory(vfio_host_tube_mem));
1798
Keiichi Watanabe7b805542021-09-03 02:13:51 +09001799 let vfio_device = VfioDevice::new_passthrough(&vfio_path, vm, vfio_container, iommu_enabled)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001800 .context("Failed to create vfio device")?;
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001801 let vfio_plat_dev = VfioPlatformDevice::new(vfio_device, vfio_device_tube_mem);
1802
1803 Ok((vfio_plat_dev, simple_jail(cfg, "vfio_platform_device")?))
1804}
1805
David Tolnay2b089fc2019-03-04 15:33:22 -08001806fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -06001807 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001808 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001809 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001810 exit_evt: &Event,
Zide Chen71435c12021-03-03 15:02:02 -08001811 phys_max_addr: u64,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001812 control_tubes: &mut Vec<TaggedControlTube>,
1813 wayland_device_tube: Tube,
1814 gpu_device_tube: Tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001815 vhost_user_gpu_tubes: Vec<(Tube, Tube)>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001816 balloon_device_tube: Tube,
1817 disk_device_tubes: &mut Vec<Tube>,
1818 pmem_device_tubes: &mut Vec<Tube>,
1819 fs_device_tubes: &mut Vec<Tube>,
Daniel Verkampf1439d42021-05-21 13:55:10 -07001820 #[cfg(feature = "usb")] usb_provider: HostBackendDeviceProvider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001821 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001822) -> DeviceResult<Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>> {
David Tolnay2b089fc2019-03-04 15:33:22 -08001823 let stubs = create_virtio_devices(
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001824 cfg,
Jakub Starona3411ea2019-04-24 10:55:25 -07001825 vm,
1826 resources,
David Tolnay2b089fc2019-03-04 15:33:22 -08001827 exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001828 wayland_device_tube,
1829 gpu_device_tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001830 vhost_user_gpu_tubes,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001831 balloon_device_tube,
1832 disk_device_tubes,
1833 pmem_device_tubes,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001834 map_request,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001835 fs_device_tubes,
David Tolnay2b089fc2019-03-04 15:33:22 -08001836 )?;
1837
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001838 let mut devices = Vec::new();
David Tolnay2b089fc2019-03-04 15:33:22 -08001839
1840 for stub in stubs {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001841 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001842 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
Zach Reiznerdc748482021-04-14 13:59:30 -07001843 let dev = VirtioPciDevice::new(vm.get_memory().clone(), stub.dev, msi_device_tube)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001844 .context("failed to create virtio pci dev")?;
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001845 let dev = Box::new(dev) as Box<dyn BusDeviceObj>;
1846 devices.push((dev, stub.jail));
David Tolnay2b089fc2019-03-04 15:33:22 -08001847 }
1848
Andrew Scull1590e6f2020-03-18 18:00:47 +00001849 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +08001850 for ac97_param in &cfg.ac97_parameters {
Zach Reiznerdc748482021-04-14 13:59:30 -07001851 let dev = Ac97Dev::try_new(vm.get_memory().clone(), ac97_param.clone())
Daniel Verkamp6b298582021-08-16 15:37:11 -07001852 .context("failed to create ac97 device")?;
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001853 let jail = simple_jail(cfg, dev.minijail_policy())?;
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001854 devices.push((Box::new(dev), jail));
David Tolnay2b089fc2019-03-04 15:33:22 -08001855 }
Andrew Scull1590e6f2020-03-18 18:00:47 +00001856
Daniel Verkampf1439d42021-05-21 13:55:10 -07001857 #[cfg(feature = "usb")]
1858 {
1859 // Create xhci controller.
1860 let usb_controller = Box::new(XhciController::new(vm.get_memory().clone(), usb_provider));
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001861 devices.push((usb_controller, simple_jail(cfg, "xhci")?));
Daniel Verkampf1439d42021-05-21 13:55:10 -07001862 }
David Tolnay2b089fc2019-03-04 15:33:22 -08001863
Zide Chen5deee482021-04-19 11:06:01 -07001864 if !cfg.vfio.is_empty() {
Zide Chendfc4b882021-03-10 16:35:37 -08001865 let mut iommu_attached_endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>> =
1866 BTreeMap::new();
1867
Tomasz Nowicki71aca792021-06-09 18:53:49 +00001868 for vfio_dev in cfg
1869 .vfio
1870 .iter()
1871 .filter(|dev| dev.get_type() == VfioType::Pci)
1872 {
1873 let vfio_path = &vfio_dev.vfio_path;
Zide Chen5deee482021-04-19 11:06:01 -07001874 let (vfio_pci_device, jail) = create_vfio_device(
1875 cfg,
1876 vm,
1877 resources,
1878 control_tubes,
1879 vfio_path.as_path(),
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001880 None,
Zide Chendfc4b882021-03-10 16:35:37 -08001881 &mut iommu_attached_endpoints,
Tomasz Nowicki71aca792021-06-09 18:53:49 +00001882 vfio_dev.iommu_enabled(),
Zide Chen5deee482021-04-19 11:06:01 -07001883 )?;
Zide Chendfc4b882021-03-10 16:35:37 -08001884
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001885 devices.push((vfio_pci_device, jail));
Zide Chen5deee482021-04-19 11:06:01 -07001886 }
Zide Chendfc4b882021-03-10 16:35:37 -08001887
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001888 for vfio_dev in cfg
1889 .vfio
1890 .iter()
1891 .filter(|dev| dev.get_type() == VfioType::Platform)
1892 {
1893 let vfio_path = &vfio_dev.vfio_path;
1894 let (vfio_plat_dev, jail) = create_vfio_platform_device(
1895 cfg,
1896 vm,
1897 resources,
1898 control_tubes,
1899 vfio_path.as_path(),
1900 &mut iommu_attached_endpoints,
1901 false, // Virtio IOMMU is not supported yet
1902 )?;
1903
1904 devices.push((Box::new(vfio_plat_dev), jail));
1905 }
1906
Zide Chendfc4b882021-03-10 16:35:37 -08001907 if !iommu_attached_endpoints.is_empty() {
Zide Chen71435c12021-03-03 15:02:02 -08001908 let iommu_dev = create_iommu_device(cfg, phys_max_addr, iommu_attached_endpoints)?;
Zide Chendfc4b882021-03-10 16:35:37 -08001909
Daniel Verkamp6b298582021-08-16 15:37:11 -07001910 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
Zide Chendfc4b882021-03-10 16:35:37 -08001911 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
Peter Fangad3b24e2021-06-21 00:43:29 -07001912 let mut dev =
1913 VirtioPciDevice::new(vm.get_memory().clone(), iommu_dev.dev, msi_device_tube)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001914 .context("failed to create virtio pci dev")?;
Peter Fangad3b24e2021-06-21 00:43:29 -07001915 // early reservation for viommu.
1916 dev.allocate_address(resources)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001917 .context("failed to allocate resources early for virtio pci dev")?;
Peter Fangad3b24e2021-06-21 00:43:29 -07001918 let dev = Box::new(dev);
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001919 devices.push((dev, iommu_dev.jail));
Zide Chendfc4b882021-03-10 16:35:37 -08001920 }
Xiong Zhang17b0daf2019-04-23 17:14:50 +08001921 }
1922
Mattias Nisslerde2c6402021-10-21 12:05:29 +00001923 for params in &cfg.stub_pci_devices {
1924 // Stub devices don't need jailing since they don't do anything.
1925 devices.push((Box::new(StubPciDevice::new(params)), None));
1926 }
1927
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001928 Ok(devices)
David Tolnay2b089fc2019-03-04 15:33:22 -08001929}
1930
1931#[derive(Copy, Clone)]
Chirantan Ekbote1a2683b2019-11-26 16:28:23 +09001932#[cfg_attr(not(feature = "tpm"), allow(dead_code))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001933struct Ids {
1934 uid: uid_t,
1935 gid: gid_t,
1936}
1937
David Tolnay48c48292019-03-01 16:54:25 -08001938// Set the uid/gid for the jailed process and give a basic id map. This is
1939// required for bind mounts to work.
Fergus Dall51200512021-08-19 12:54:26 +10001940fn add_current_user_to_jail(jail: &mut Minijail) -> Result<Ids> {
1941 let crosvm_uid = geteuid();
1942 let crosvm_gid = getegid();
David Tolnay48c48292019-03-01 16:54:25 -08001943
David Tolnay48c48292019-03-01 16:54:25 -08001944 jail.uidmap(&format!("{0} {0} 1", crosvm_uid))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001945 .context("error setting UID map")?;
David Tolnay48c48292019-03-01 16:54:25 -08001946 jail.gidmap(&format!("{0} {0} 1", crosvm_gid))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001947 .context("error setting GID map")?;
David Tolnay48c48292019-03-01 16:54:25 -08001948
Chirantan Ekbotee1663ee2021-09-03 18:31:25 +09001949 if crosvm_uid != 0 {
1950 jail.change_uid(crosvm_uid);
1951 }
1952 if crosvm_gid != 0 {
1953 jail.change_gid(crosvm_gid);
1954 }
Fergus Dall51200512021-08-19 12:54:26 +10001955
David Tolnay41a6f842019-03-01 16:18:44 -08001956 Ok(Ids {
1957 uid: crosvm_uid,
1958 gid: crosvm_gid,
1959 })
David Tolnay48c48292019-03-01 16:54:25 -08001960}
1961
Chia-I Wu16fb6592021-11-10 11:45:32 -08001962fn add_current_user_as_root_to_jail(jail: &mut Minijail) -> Result<Ids> {
1963 let crosvm_uid = geteuid();
1964 let crosvm_gid = getegid();
1965 jail.uidmap(&format!("0 {0} 1", crosvm_uid))
1966 .context("error setting UID map")?;
1967 jail.gidmap(&format!("0 {0} 1", crosvm_gid))
1968 .context("error setting GID map")?;
1969
1970 Ok(Ids {
1971 uid: crosvm_uid,
1972 gid: crosvm_gid,
1973 })
1974}
1975
Zach Reizner65b98f12019-11-22 17:34:58 -08001976trait IntoUnixStream {
1977 fn into_unix_stream(self) -> Result<UnixStream>;
1978}
1979
1980impl<'a> IntoUnixStream for &'a Path {
1981 fn into_unix_stream(self) -> Result<UnixStream> {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001982 if let Some(fd) = safe_descriptor_from_path(self).context("failed to open event device")? {
Andrew Walbranbc55e302021-07-13 17:35:10 +01001983 Ok(fd.into())
Zach Reizner65b98f12019-11-22 17:34:58 -08001984 } else {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001985 UnixStream::connect(self).context("failed to open event device")
Zach Reizner65b98f12019-11-22 17:34:58 -08001986 }
1987 }
1988}
1989impl<'a> IntoUnixStream for &'a PathBuf {
1990 fn into_unix_stream(self) -> Result<UnixStream> {
1991 self.as_path().into_unix_stream()
1992 }
1993}
1994
1995impl IntoUnixStream for UnixStream {
1996 fn into_unix_stream(self) -> Result<UnixStream> {
1997 Ok(self)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001998 }
1999}
2000
Steven Richmanf32d0b42020-06-20 21:45:32 -07002001fn setup_vcpu_signal_handler<T: Vcpu>(use_hypervisor_signals: bool) -> Result<()> {
2002 if use_hypervisor_signals {
Matt Delco84cf9c02019-10-07 22:38:13 -07002003 unsafe {
Allen Webb44c728c2021-03-23 15:22:41 -05002004 extern "C" fn handle_signal(_: c_int) {}
Matt Delco84cf9c02019-10-07 22:38:13 -07002005 // Our signal handler does nothing and is trivially async signal safe.
2006 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002007 .context("error registering signal handler")?;
Matt Delco84cf9c02019-10-07 22:38:13 -07002008 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07002009 block_signal(SIGRTMIN() + 0).context("failed to block signal")?;
Matt Delco84cf9c02019-10-07 22:38:13 -07002010 } else {
2011 unsafe {
Allen Webb44c728c2021-03-23 15:22:41 -05002012 extern "C" fn handle_signal<T: Vcpu>(_: c_int) {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002013 T::set_local_immediate_exit(true);
Matt Delco84cf9c02019-10-07 22:38:13 -07002014 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002015 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal::<T>)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002016 .context("error registering signal handler")?;
Matt Delco84cf9c02019-10-07 22:38:13 -07002017 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002018 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002019 Ok(())
2020}
2021
Steven Richmanf32d0b42020-06-20 21:45:32 -07002022// Sets up a vcpu and converts it into a runnable vcpu.
Zach Reizner2c770e62020-09-30 16:49:59 -07002023fn runnable_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002024 cpu_id: usize,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002025 kvm_vcpu_id: usize,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002026 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07002027 vm: impl VmArch,
Zach Reiznerdc748482021-04-14 13:59:30 -07002028 irq_chip: &mut dyn IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002029 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002030 run_rt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002031 vcpu_affinity: Vec<usize>,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002032 no_smt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002033 has_bios: bool,
2034 use_hypervisor_signals: bool,
Yusuke Sato31e136a2021-08-18 11:51:38 -07002035 enable_per_vm_core_scheduling: bool,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002036 host_cpu_topology: bool,
Zach Reizner2c770e62020-09-30 16:49:59 -07002037) -> Result<(V, VcpuRunHandle)>
Steven Richmanf32d0b42020-06-20 21:45:32 -07002038where
Zach Reizner2c770e62020-09-30 16:49:59 -07002039 V: VcpuArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002040{
Zach Reizner304e7312020-09-29 16:00:24 -07002041 let mut vcpu = match vcpu {
2042 Some(v) => v,
2043 None => {
2044 // If vcpu is None, it means this arch/hypervisor requires create_vcpu to be called from
2045 // the vcpu thread.
2046 match vm
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002047 .create_vcpu(kvm_vcpu_id)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002048 .context("failed to create vcpu")?
Zach Reizner304e7312020-09-29 16:00:24 -07002049 .downcast::<V>()
2050 {
2051 Ok(v) => *v,
2052 Err(_) => panic!("VM created wrong type of VCPU"),
2053 }
2054 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002055 };
Dylan Reidbb30b2f2019-10-22 18:30:36 +03002056
Steven Richmanf32d0b42020-06-20 21:45:32 -07002057 irq_chip
Zach Reizner304e7312020-09-29 16:00:24 -07002058 .add_vcpu(cpu_id, &vcpu)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002059 .context("failed to add vcpu to irq chip")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002060
Daniel Verkampcaf9ced2020-09-29 15:35:02 -07002061 if !vcpu_affinity.is_empty() {
2062 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
2063 error!("Failed to set CPU affinity: {}", e);
2064 }
2065 }
2066
Steven Richmanf32d0b42020-06-20 21:45:32 -07002067 Arch::configure_vcpu(
2068 vm.get_memory(),
2069 vm.get_hypervisor(),
2070 irq_chip,
2071 &mut vcpu,
2072 cpu_id,
2073 vcpu_count,
2074 has_bios,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002075 no_smt,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002076 host_cpu_topology,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002077 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002078 .context("failed to configure vcpu")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002079
Yusuke Sato31e136a2021-08-18 11:51:38 -07002080 if !enable_per_vm_core_scheduling {
2081 // Do per-vCPU core scheduling by setting a unique cookie to each vCPU.
2082 if let Err(e) = enable_core_scheduling() {
2083 error!("Failed to enable core scheduling: {}", e);
2084 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002085 }
2086
Kansho Nishidaab205af2020-08-13 18:17:50 +09002087 if run_rt {
2088 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
2089 if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
2090 .and_then(|_| set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL)))
2091 {
2092 warn!("Failed to set vcpu to real time: {}", e);
2093 }
2094 }
2095
Steven Richmanf32d0b42020-06-20 21:45:32 -07002096 if use_hypervisor_signals {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002097 let mut v = get_blocked_signals().context("failed to retrieve signal mask for vcpu")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002098 v.retain(|&x| x != SIGRTMIN() + 0);
Daniel Verkamp6b298582021-08-16 15:37:11 -07002099 vcpu.set_signal_mask(&v)
2100 .context("failed to set the signal mask for vcpu")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002101 }
2102
Zach Reizner2c770e62020-09-30 16:49:59 -07002103 let vcpu_run_handle = vcpu
2104 .take_run_handle(Some(SIGRTMIN() + 0))
Daniel Verkamp6b298582021-08-16 15:37:11 -07002105 .context("failed to set thread id for vcpu")?;
Zach Reizner2c770e62020-09-30 16:49:59 -07002106
2107 Ok((vcpu, vcpu_run_handle))
Dylan Reidbb30b2f2019-10-22 18:30:36 +03002108}
2109
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002110#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2111fn handle_debug_msg<V>(
2112 cpu_id: usize,
2113 vcpu: &V,
2114 guest_mem: &GuestMemory,
2115 d: VcpuDebug,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002116 reply_tube: &mpsc::Sender<VcpuDebugStatusMessage>,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002117) -> Result<()>
2118where
2119 V: VcpuArch + 'static,
2120{
2121 match d {
2122 VcpuDebug::ReadRegs => {
2123 let msg = VcpuDebugStatusMessage {
2124 cpu: cpu_id as usize,
2125 msg: VcpuDebugStatus::RegValues(
Daniel Verkamp6b298582021-08-16 15:37:11 -07002126 Arch::debug_read_registers(vcpu as &V)
2127 .context("failed to handle a gdb ReadRegs command")?,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002128 ),
2129 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002130 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002131 .send(msg)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002132 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002133 }
2134 VcpuDebug::WriteRegs(regs) => {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002135 Arch::debug_write_registers(vcpu as &V, &regs)
2136 .context("failed to handle a gdb WriteRegs command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002137 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002138 .send(VcpuDebugStatusMessage {
2139 cpu: cpu_id as usize,
2140 msg: VcpuDebugStatus::CommandComplete,
2141 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002142 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002143 }
2144 VcpuDebug::ReadMem(vaddr, len) => {
2145 let msg = VcpuDebugStatusMessage {
2146 cpu: cpu_id as usize,
2147 msg: VcpuDebugStatus::MemoryRegion(
2148 Arch::debug_read_memory(vcpu as &V, guest_mem, vaddr, len)
2149 .unwrap_or(Vec::new()),
2150 ),
2151 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002152 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002153 .send(msg)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002154 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002155 }
2156 VcpuDebug::WriteMem(vaddr, buf) => {
2157 Arch::debug_write_memory(vcpu as &V, guest_mem, vaddr, &buf)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002158 .context("failed to handle a gdb WriteMem command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002159 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002160 .send(VcpuDebugStatusMessage {
2161 cpu: cpu_id as usize,
2162 msg: VcpuDebugStatus::CommandComplete,
2163 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002164 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002165 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002166 VcpuDebug::EnableSinglestep => {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002167 Arch::debug_enable_singlestep(vcpu as &V)
2168 .context("failed to handle a gdb EnableSingleStep command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002169 reply_tube
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002170 .send(VcpuDebugStatusMessage {
2171 cpu: cpu_id as usize,
2172 msg: VcpuDebugStatus::CommandComplete,
2173 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002174 .context("failed to send a debug status to GDB thread")
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002175 }
2176 VcpuDebug::SetHwBreakPoint(addrs) => {
2177 Arch::debug_set_hw_breakpoints(vcpu as &V, &addrs)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002178 .context("failed to handle a gdb SetHwBreakPoint command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002179 reply_tube
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002180 .send(VcpuDebugStatusMessage {
2181 cpu: cpu_id as usize,
2182 msg: VcpuDebugStatus::CommandComplete,
2183 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002184 .context("failed to send a debug status to GDB thread")
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002185 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002186 }
2187}
2188
Zach Reizner2c770e62020-09-30 16:49:59 -07002189fn run_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002190 cpu_id: usize,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002191 kvm_vcpu_id: usize,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002192 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07002193 vm: impl VmArch + 'static,
Zach Reiznerdc748482021-04-14 13:59:30 -07002194 mut irq_chip: Box<dyn IrqChipArch + 'static>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002195 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002196 run_rt: bool,
Daniel Verkamp107edb32019-04-05 09:58:48 -07002197 vcpu_affinity: Vec<usize>,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002198 delay_rt: bool,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002199 no_smt: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07002200 start_barrier: Arc<Barrier>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002201 has_bios: bool,
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07002202 mut io_bus: devices::Bus,
2203 mut mmio_bus: devices::Bus,
Michael Hoyle685316f2020-09-16 15:29:20 -07002204 exit_evt: Event,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002205 requires_pvclock_ctrl: bool,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002206 from_main_tube: mpsc::Receiver<VcpuControl>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002207 use_hypervisor_signals: bool,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002208 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_tube: Option<
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002209 mpsc::Sender<VcpuDebugStatusMessage>,
2210 >,
Yusuke Sato31e136a2021-08-18 11:51:38 -07002211 enable_per_vm_core_scheduling: bool,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002212 host_cpu_topology: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002213) -> Result<JoinHandle<()>>
2214where
Zach Reizner2c770e62020-09-30 16:49:59 -07002215 V: VcpuArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002216{
Zach Reizner8fb52112017-12-13 16:04:39 -08002217 thread::Builder::new()
2218 .name(format!("crosvm_vcpu{}", cpu_id))
2219 .spawn(move || {
Zach Reizner95885312020-01-29 18:06:01 -08002220 // The VCPU thread must trigger the `exit_evt` in all paths, and a `ScopedEvent`'s Drop
2221 // implementation accomplishes that.
2222 let _scoped_exit_evt = ScopedEvent::from(exit_evt);
2223
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002224 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2225 let guest_mem = vm.get_memory().clone();
Zach Reizner2c770e62020-09-30 16:49:59 -07002226 let runnable_vcpu = runnable_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002227 cpu_id,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002228 kvm_vcpu_id,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002229 vcpu,
2230 vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07002231 irq_chip.as_mut(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002232 vcpu_count,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002233 run_rt && !delay_rt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002234 vcpu_affinity,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002235 no_smt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002236 has_bios,
2237 use_hypervisor_signals,
Yusuke Sato31e136a2021-08-18 11:51:38 -07002238 enable_per_vm_core_scheduling,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002239 host_cpu_topology,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002240 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08002241
Zach Reizner8fb52112017-12-13 16:04:39 -08002242 start_barrier.wait();
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002243
Zach Reizner2c770e62020-09-30 16:49:59 -07002244 let (vcpu, vcpu_run_handle) = match runnable_vcpu {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002245 Ok(v) => v,
2246 Err(e) => {
Maciek Swiechc3011222021-11-24 21:01:04 +00002247 error!("failed to start vcpu {}: {:#}", cpu_id, e);
Steven Richmanf32d0b42020-06-20 21:45:32 -07002248 return;
2249 }
2250 };
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002251
Dylan Reidb0492662019-05-17 14:50:13 -07002252 let mut run_mode = VmRunMode::Running;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002253 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002254 if to_gdb_tube.is_some() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002255 // Wait until a GDB client attaches
2256 run_mode = VmRunMode::Breakpoint;
2257 }
2258
Dylan Reidb0492662019-05-17 14:50:13 -07002259 let mut interrupted_by_signal = false;
2260
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07002261 mmio_bus.set_access_id(cpu_id);
2262 io_bus.set_access_id(cpu_id);
2263
Dylan Reidb0492662019-05-17 14:50:13 -07002264 'vcpu_loop: loop {
2265 // Start by checking for messages to process and the run state of the CPU.
2266 // An extra check here for Running so there isn't a need to call recv unless a
2267 // message is likely to be ready because a signal was sent.
2268 if interrupted_by_signal || run_mode != VmRunMode::Running {
2269 'state_loop: loop {
2270 // Tries to get a pending message without blocking first.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002271 let msg = match from_main_tube.try_recv() {
Dylan Reidb0492662019-05-17 14:50:13 -07002272 Ok(m) => m,
2273 Err(mpsc::TryRecvError::Empty) if run_mode == VmRunMode::Running => {
2274 // If the VM is running and no message is pending, the state won't
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002275 // change.
Dylan Reidb0492662019-05-17 14:50:13 -07002276 break 'state_loop;
2277 }
2278 Err(mpsc::TryRecvError::Empty) => {
2279 // If the VM is not running, wait until a message is ready.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002280 match from_main_tube.recv() {
Dylan Reidb0492662019-05-17 14:50:13 -07002281 Ok(m) => m,
2282 Err(mpsc::RecvError) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002283 error!("Failed to read from main tube in vcpu");
Dylan Reidb0492662019-05-17 14:50:13 -07002284 break 'vcpu_loop;
2285 }
2286 }
2287 }
2288 Err(mpsc::TryRecvError::Disconnected) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002289 error!("Failed to read from main tube in vcpu");
Dylan Reidb0492662019-05-17 14:50:13 -07002290 break 'vcpu_loop;
2291 }
2292 };
2293
2294 // Collect all pending messages.
2295 let mut messages = vec![msg];
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002296 messages.append(&mut from_main_tube.try_iter().collect());
Dylan Reidb0492662019-05-17 14:50:13 -07002297
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002298 for msg in messages {
2299 match msg {
2300 VcpuControl::RunState(new_mode) => {
2301 run_mode = new_mode;
2302 match run_mode {
2303 VmRunMode::Running => break 'state_loop,
2304 VmRunMode::Suspending => {
2305 // On KVM implementations that use a paravirtualized
2306 // clock (e.g. x86), a flag must be set to indicate to
2307 // the guest kernel that a vCPU was suspended. The guest
2308 // kernel will use this flag to prevent the soft lockup
2309 // detection from triggering when this vCPU resumes,
2310 // which could happen days later in realtime.
2311 if requires_pvclock_ctrl {
2312 if let Err(e) = vcpu.pvclock_ctrl() {
2313 error!(
2314 "failed to tell hypervisor vcpu {} is suspending: {}",
2315 cpu_id, e
2316 );
2317 }
2318 }
2319 }
2320 VmRunMode::Breakpoint => {}
2321 VmRunMode::Exiting => break 'vcpu_loop,
2322 }
2323 }
2324 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2325 VcpuControl::Debug(d) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002326 match &to_gdb_tube {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002327 Some(ref ch) => {
2328 if let Err(e) = handle_debug_msg(
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07002329 cpu_id, &vcpu, &guest_mem, d, ch,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002330 ) {
2331 error!("Failed to handle gdb message: {}", e);
2332 }
2333 },
2334 None => {
2335 error!("VcpuControl::Debug received while GDB feature is disabled: {:?}", d);
Dylan Reidb0492662019-05-17 14:50:13 -07002336 }
2337 }
2338 }
Suleiman Souhlal2ac78b92021-02-01 12:33:26 +09002339 VcpuControl::MakeRT => {
2340 if run_rt && delay_rt {
2341 info!("Making vcpu {} RT\n", cpu_id);
2342 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
2343 if let Err(e) = set_rt_prio_limit(
2344 u64::from(DEFAULT_VCPU_RT_LEVEL))
2345 .and_then(|_|
2346 set_rt_round_robin(
2347 i32::from(DEFAULT_VCPU_RT_LEVEL)
2348 ))
2349 {
2350 warn!("Failed to set vcpu to real time: {}", e);
2351 }
2352 }
2353 }
Dylan Reidb0492662019-05-17 14:50:13 -07002354 }
2355 }
2356 }
2357 }
2358
2359 interrupted_by_signal = false;
2360
Steven Richman11dc6712020-09-02 15:39:14 -07002361 // Vcpus may have run a HLT instruction, which puts them into a state other than
2362 // VcpuRunState::Runnable. In that case, this call to wait_until_runnable blocks
2363 // until either the irqchip receives an interrupt for this vcpu, or until the main
2364 // thread kicks this vcpu as a result of some VmControl operation. In most IrqChip
2365 // implementations HLT instructions do not make it to crosvm, and thus this is a
2366 // no-op that always returns VcpuRunState::Runnable.
2367 match irq_chip.wait_until_runnable(&vcpu) {
2368 Ok(VcpuRunState::Runnable) => {}
2369 Ok(VcpuRunState::Interrupted) => interrupted_by_signal = true,
2370 Err(e) => error!(
2371 "error waiting for vcpu {} to become runnable: {}",
2372 cpu_id, e
2373 ),
2374 }
2375
2376 if !interrupted_by_signal {
2377 match vcpu.run(&vcpu_run_handle) {
2378 Ok(VcpuExit::IoIn { port, mut size }) => {
2379 let mut data = [0; 8];
2380 if size > data.len() {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002381 error!("unsupported IoIn size of {} bytes at port {:#x}", size, port);
Steven Richman11dc6712020-09-02 15:39:14 -07002382 size = data.len();
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002383 }
Steven Richman11dc6712020-09-02 15:39:14 -07002384 io_bus.read(port as u64, &mut data[..size]);
2385 if let Err(e) = vcpu.set_data(&data[..size]) {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002386 error!("failed to set return data for IoIn at port {:#x}: {}", port, e);
Steven Richman11dc6712020-09-02 15:39:14 -07002387 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002388 }
Steven Richman11dc6712020-09-02 15:39:14 -07002389 Ok(VcpuExit::IoOut {
2390 port,
2391 mut size,
2392 data,
2393 }) => {
2394 if size > data.len() {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002395 error!("unsupported IoOut size of {} bytes at port {:#x}", size, port);
Steven Richman11dc6712020-09-02 15:39:14 -07002396 size = data.len();
2397 }
2398 io_bus.write(port as u64, &data[..size]);
2399 }
2400 Ok(VcpuExit::MmioRead { address, size }) => {
2401 let mut data = [0; 8];
2402 mmio_bus.read(address, &mut data[..size]);
2403 // Setting data for mmio can not fail.
2404 let _ = vcpu.set_data(&data[..size]);
2405 }
2406 Ok(VcpuExit::MmioWrite {
2407 address,
2408 size,
2409 data,
2410 }) => {
2411 mmio_bus.write(address, &data[..size]);
2412 }
2413 Ok(VcpuExit::IoapicEoi { vector }) => {
2414 if let Err(e) = irq_chip.broadcast_eoi(vector) {
2415 error!(
2416 "failed to broadcast eoi {} on vcpu {}: {}",
2417 vector, cpu_id, e
2418 );
2419 }
2420 }
2421 Ok(VcpuExit::IrqWindowOpen) => {}
Leo Lai558460f2021-07-23 05:32:27 +00002422 Ok(VcpuExit::Hlt) => irq_chip.halted(cpu_id),
Steven Richman11dc6712020-09-02 15:39:14 -07002423 Ok(VcpuExit::Shutdown) => break,
2424 Ok(VcpuExit::FailEntry {
2425 hardware_entry_failure_reason,
2426 }) => {
2427 error!("vcpu hw run failure: {:#x}", hardware_entry_failure_reason);
Steven Richmanf32d0b42020-06-20 21:45:32 -07002428 break;
2429 }
Steven Richman11dc6712020-09-02 15:39:14 -07002430 Ok(VcpuExit::SystemEvent(_, _)) => break,
2431 Ok(VcpuExit::Debug { .. }) => {
2432 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2433 {
2434 let msg = VcpuDebugStatusMessage {
2435 cpu: cpu_id as usize,
2436 msg: VcpuDebugStatus::HitBreakPoint,
2437 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002438 if let Some(ref ch) = to_gdb_tube {
Steven Richman11dc6712020-09-02 15:39:14 -07002439 if let Err(e) = ch.send(msg) {
2440 error!("failed to notify breakpoint to GDB thread: {}", e);
2441 break;
2442 }
2443 }
2444 run_mode = VmRunMode::Breakpoint;
2445 }
2446 }
2447 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
2448 Err(e) => match e.errno() {
2449 libc::EINTR => interrupted_by_signal = true,
2450 libc::EAGAIN => {}
2451 _ => {
2452 error!("vcpu hit unknown error: {}", e);
2453 break;
2454 }
2455 },
2456 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002457 }
2458
2459 if interrupted_by_signal {
2460 if use_hypervisor_signals {
2461 // Try to clear the signal that we use to kick VCPU if it is pending before
2462 // attempting to handle pause requests.
2463 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
2464 error!("failed to clear pending signal: {}", e);
2465 break;
2466 }
2467 } else {
2468 vcpu.set_immediate_exit(false);
2469 }
David Tolnay8f3a2322018-11-30 17:11:35 -08002470 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002471
Steven Richman11dc6712020-09-02 15:39:14 -07002472 if let Err(e) = irq_chip.inject_interrupts(&vcpu) {
2473 error!("failed to inject interrupts for vcpu {}: {}", cpu_id, e);
2474 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002475 }
David Tolnay2bac1e72018-12-12 14:33:42 -08002476 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002477 .context("failed to spawn VCPU thread")
Zach Reizner39aa26b2017-12-12 18:03:23 -08002478}
2479
Zach Reiznera90649a2021-03-31 12:56:08 -07002480fn setup_vm_components(cfg: &Config) -> Result<VmComponents> {
David Tolnay2b089fc2019-03-04 15:33:22 -08002481 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +01002482 Some(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002483 open_file(
2484 initrd_path,
2485 true, /*read_only*/
2486 false, /*O_DIRECT*/
2487 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002488 .with_context(|| format!("failed to open initrd {}", initrd_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +01002489 )
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002490 } else {
2491 None
2492 };
2493
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002494 let vm_image = match cfg.executable_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +01002495 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002496 open_file(
2497 kernel_path,
2498 true, /*read_only*/
2499 false, /*O_DIRECT*/
2500 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002501 .with_context(|| format!("failed to open kernel image {}", kernel_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +01002502 ),
2503 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002504 open_file(bios_path, true /*read_only*/, false /*O_DIRECT*/)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002505 .with_context(|| format!("failed to open bios {}", bios_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +01002506 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002507 _ => panic!("Did not receive a bios or kernel, should be impossible."),
2508 };
2509
Will Deaconc48e7832021-07-30 19:03:06 +01002510 let swiotlb = if let Some(size) = cfg.swiotlb {
2511 Some(
2512 size.checked_mul(1024 * 1024)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002513 .ok_or_else(|| anyhow!("requested swiotlb size too large"))?,
Will Deaconc48e7832021-07-30 19:03:06 +01002514 )
2515 } else {
2516 match cfg.protected_vm {
Andrew Walbran0bbbb682021-12-13 13:42:07 +00002517 ProtectionType::Protected | ProtectionType::ProtectedWithoutFirmware => {
2518 Some(64 * 1024 * 1024)
2519 }
Will Deaconc48e7832021-07-30 19:03:06 +01002520 ProtectionType::Unprotected => None,
2521 }
2522 };
2523
Zach Reiznera90649a2021-03-31 12:56:08 -07002524 Ok(VmComponents {
Daniel Verkamp6a847062019-11-26 13:16:35 -08002525 memory_size: cfg
2526 .memory
2527 .unwrap_or(256)
2528 .checked_mul(1024 * 1024)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002529 .ok_or_else(|| anyhow!("requested memory size too large"))?,
Will Deaconc48e7832021-07-30 19:03:06 +01002530 swiotlb,
Dylan Reid059a1882018-07-23 17:58:09 -07002531 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -07002532 vcpu_affinity: cfg.vcpu_affinity.clone(),
Daniel Verkamp8a72afc2021-03-15 17:55:52 -07002533 cpu_clusters: cfg.cpu_clusters.clone(),
2534 cpu_capacity: cfg.cpu_capacity.clone(),
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002535 no_smt: cfg.no_smt,
Sergey Senozhatsky1e369c52021-04-13 20:23:51 +09002536 hugepages: cfg.hugepages,
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002537 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002538 android_fstab: cfg
2539 .android_fstab
2540 .as_ref()
Daniel Verkamp6b298582021-08-16 15:37:11 -07002541 .map(|x| {
2542 File::open(x)
2543 .with_context(|| format!("failed to open android fstab file {}", x.display()))
2544 })
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002545 .map_or(Ok(None), |v| v.map(Some))?,
Kansho Nishida282115b2019-12-18 13:13:14 +09002546 pstore: cfg.pstore.clone(),
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002547 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -07002548 extra_kernel_params: cfg.params.clone(),
Tomasz Jeznach42644642020-05-20 23:27:59 -07002549 acpi_sdts: cfg
2550 .acpi_tables
2551 .iter()
Daniel Verkamp6b298582021-08-16 15:37:11 -07002552 .map(|path| {
2553 SDT::from_file(path)
2554 .with_context(|| format!("failed to open ACPI file {}", path.display()))
2555 })
Tomasz Jeznach42644642020-05-20 23:27:59 -07002556 .collect::<Result<Vec<SDT>>>()?,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002557 rt_cpus: cfg.rt_cpus.clone(),
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002558 delay_rt: cfg.delay_rt,
Will Deacon7d2b8ac2020-10-06 18:51:12 +01002559 protected_vm: cfg.protected_vm,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002560 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznera90649a2021-03-31 12:56:08 -07002561 gdb: None,
Tomasz Jeznachccb26942021-03-30 22:44:11 -07002562 dmi_path: cfg.dmi_path.clone(),
Tomasz Jeznachd93c29f2021-04-12 11:00:24 -07002563 no_legacy: cfg.no_legacy,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002564 host_cpu_topology: cfg.host_cpu_topology,
Zach Reiznera90649a2021-03-31 12:56:08 -07002565 })
2566}
2567
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002568pub enum ExitState {
2569 Reset,
2570 Stop,
2571}
2572
2573pub fn run_config(cfg: Config) -> Result<ExitState> {
Zach Reiznerdc748482021-04-14 13:59:30 -07002574 let components = setup_vm_components(&cfg)?;
2575
2576 let guest_mem_layout =
Daniel Verkamp6b298582021-08-16 15:37:11 -07002577 Arch::guest_memory_layout(&components).context("failed to create guest memory layout")?;
2578 let guest_mem = GuestMemory::new(&guest_mem_layout).context("failed to create guest memory")?;
Zach Reiznerdc748482021-04-14 13:59:30 -07002579 let mut mem_policy = MemoryPolicy::empty();
2580 if components.hugepages {
2581 mem_policy |= MemoryPolicy::USE_HUGEPAGES;
2582 }
Quentin Perret26203802021-12-02 09:48:43 +00002583 guest_mem.set_memory_policy(mem_policy);
Daniel Verkamp6b298582021-08-16 15:37:11 -07002584 let kvm = Kvm::new_with_path(&cfg.kvm_device_path).context("failed to create kvm")?;
Andrew Walbran00f1c9f2021-12-10 17:13:08 +00002585 let vm = KvmVm::new(&kvm, guest_mem, components.protected_vm).context("failed to create vm")?;
Daniel Verkamp6b298582021-08-16 15:37:11 -07002586 let vm_clone = vm.try_clone().context("failed to clone vm")?;
Zach Reiznerdc748482021-04-14 13:59:30 -07002587
2588 enum KvmIrqChip {
2589 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2590 Split(KvmSplitIrqChip),
2591 Kernel(KvmKernelIrqChip),
2592 }
2593
2594 impl KvmIrqChip {
2595 fn as_mut(&mut self) -> &mut dyn IrqChipArch {
2596 match self {
2597 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2598 KvmIrqChip::Split(i) => i,
2599 KvmIrqChip::Kernel(i) => i,
2600 }
2601 }
2602 }
2603
2604 let ioapic_host_tube;
2605 let mut irq_chip = if cfg.split_irqchip {
2606 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
2607 unimplemented!("KVM split irqchip mode only supported on x86 processors");
2608 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2609 {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002610 let (host_tube, ioapic_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerdc748482021-04-14 13:59:30 -07002611 ioapic_host_tube = Some(host_tube);
2612 KvmIrqChip::Split(
2613 KvmSplitIrqChip::new(
2614 vm_clone,
2615 components.vcpu_count,
2616 ioapic_device_tube,
2617 Some(120),
2618 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002619 .context("failed to create IRQ chip")?,
Zach Reiznerdc748482021-04-14 13:59:30 -07002620 )
2621 }
2622 } else {
2623 ioapic_host_tube = None;
2624 KvmIrqChip::Kernel(
Daniel Verkamp6b298582021-08-16 15:37:11 -07002625 KvmKernelIrqChip::new(vm_clone, components.vcpu_count)
2626 .context("failed to create IRQ chip")?,
Zach Reiznerdc748482021-04-14 13:59:30 -07002627 )
2628 };
2629
2630 run_vm::<KvmVcpu, KvmVm>(cfg, components, vm, irq_chip.as_mut(), ioapic_host_tube)
2631}
2632
2633fn run_vm<Vcpu, V>(
Zach Reiznera90649a2021-03-31 12:56:08 -07002634 cfg: Config,
2635 #[allow(unused_mut)] mut components: VmComponents,
Zach Reiznerdc748482021-04-14 13:59:30 -07002636 mut vm: V,
2637 irq_chip: &mut dyn IrqChipArch,
2638 ioapic_host_tube: Option<Tube>,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002639) -> Result<ExitState>
Zach Reiznera90649a2021-03-31 12:56:08 -07002640where
2641 Vcpu: VcpuArch + 'static,
2642 V: VmArch + 'static,
Zach Reiznera90649a2021-03-31 12:56:08 -07002643{
2644 if cfg.sandbox {
2645 // Printing something to the syslog before entering minijail so that libc's syslogger has a
2646 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
2647 // access to those files will not be possible.
2648 info!("crosvm entering multiprocess mode");
2649 }
2650
Daniel Verkampf1439d42021-05-21 13:55:10 -07002651 #[cfg(feature = "usb")]
Zach Reiznera90649a2021-03-31 12:56:08 -07002652 let (usb_control_tube, usb_provider) =
Daniel Verkamp6b298582021-08-16 15:37:11 -07002653 HostBackendDeviceProvider::new().context("failed to create usb provider")?;
Daniel Verkampf1439d42021-05-21 13:55:10 -07002654
Zach Reiznera90649a2021-03-31 12:56:08 -07002655 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
2656 // before any jailed devices have been spawned, so that we can catch any of them that fail very
2657 // quickly.
Daniel Verkamp6b298582021-08-16 15:37:11 -07002658 let sigchld_fd = SignalFd::new(libc::SIGCHLD).context("failed to create signalfd")?;
Dylan Reid059a1882018-07-23 17:58:09 -07002659
Zach Reiznera60744b2019-02-13 17:33:32 -08002660 let control_server_socket = match &cfg.socket_path {
2661 Some(path) => Some(UnlinkUnixSeqpacketListener(
Daniel Verkamp6b298582021-08-16 15:37:11 -07002662 UnixSeqpacketListener::bind(path).context("failed to create control server")?,
Zach Reiznera60744b2019-02-13 17:33:32 -08002663 )),
2664 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07002665 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002666
Zach Reiznera90649a2021-03-31 12:56:08 -07002667 let mut control_tubes = Vec::new();
2668
2669 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2670 if let Some(port) = cfg.gdb {
2671 // GDB needs a control socket to interrupt vcpus.
Daniel Verkamp6b298582021-08-16 15:37:11 -07002672 let (gdb_host_tube, gdb_control_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznera90649a2021-03-31 12:56:08 -07002673 control_tubes.push(TaggedControlTube::Vm(gdb_host_tube));
2674 components.gdb = Some((port, gdb_control_tube));
2675 }
2676
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09002677 for wl_cfg in &cfg.vhost_user_wl {
2678 let wayland_host_tube = UnixSeqpacket::connect(&wl_cfg.vm_tube)
2679 .map(Tube::new)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002680 .context("failed to connect to wayland tube")?;
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09002681 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
2682 }
2683
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002684 let mut vhost_user_gpu_tubes = Vec::with_capacity(cfg.vhost_user_gpu.len());
2685 for _ in 0..cfg.vhost_user_gpu.len() {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002686 let (host_tube, device_tube) = Tube::pair().context("failed to create tube")?;
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002687 vhost_user_gpu_tubes.push((
Daniel Verkamp6b298582021-08-16 15:37:11 -07002688 host_tube.try_clone().context("failed to clone tube")?,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002689 device_tube,
2690 ));
2691 control_tubes.push(TaggedControlTube::VmMemory(host_tube));
2692 }
2693
Daniel Verkamp6b298582021-08-16 15:37:11 -07002694 let (wayland_host_tube, wayland_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002695 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
Dylan Reid059a1882018-07-23 17:58:09 -07002696 // Balloon gets a special socket so balloon requests can be forwarded from the main process.
Daniel Verkamp6b298582021-08-16 15:37:11 -07002697 let (balloon_host_tube, balloon_device_tube) = Tube::pair().context("failed to create tube")?;
Hikaru Nishidaaf3f3bb2021-05-21 12:03:54 +09002698 // Set recv timeout to avoid deadlock on sending BalloonControlCommand before guest is ready.
2699 balloon_host_tube
2700 .set_recv_timeout(Some(Duration::from_millis(100)))
Daniel Verkamp6b298582021-08-16 15:37:11 -07002701 .context("failed to create tube")?;
Dylan Reid059a1882018-07-23 17:58:09 -07002702
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002703 // Create one control socket per disk.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002704 let mut disk_device_tubes = Vec::new();
2705 let mut disk_host_tubes = Vec::new();
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002706 let disk_count = cfg.disks.len();
2707 for _ in 0..disk_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002708 let (disk_host_tub, disk_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002709 disk_host_tubes.push(disk_host_tub);
2710 disk_device_tubes.push(disk_device_tube);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002711 }
2712
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002713 let mut pmem_device_tubes = Vec::new();
Daniel Verkampe1980a92020-02-07 11:00:55 -08002714 let pmem_count = cfg.pmem_devices.len();
2715 for _ in 0..pmem_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002716 let (pmem_host_tube, pmem_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002717 pmem_device_tubes.push(pmem_device_tube);
2718 control_tubes.push(TaggedControlTube::VmMsync(pmem_host_tube));
Daniel Verkampe1980a92020-02-07 11:00:55 -08002719 }
2720
Daniel Verkamp6b298582021-08-16 15:37:11 -07002721 let (gpu_host_tube, gpu_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002722 control_tubes.push(TaggedControlTube::VmMemory(gpu_host_tube));
Gurchetan Singh96beafc2019-05-15 09:46:52 -07002723
Zach Reiznerdc748482021-04-14 13:59:30 -07002724 if let Some(ioapic_host_tube) = ioapic_host_tube {
2725 control_tubes.push(TaggedControlTube::VmIrq(ioapic_host_tube));
2726 }
Zhuocheng Dingf2e90bf2019-12-02 15:50:20 +08002727
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002728 let battery = if cfg.battery_type.is_some() {
Daniel Verkampcfe49462021-08-19 17:11:05 -07002729 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(clippy::manual_map))]
Alex Lauf408c732020-11-10 18:24:04 +09002730 let jail = match simple_jail(&cfg, "battery")? {
Daniel Verkampcfe49462021-08-19 17:11:05 -07002731 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(unused_mut))]
Alex Lauf408c732020-11-10 18:24:04 +09002732 Some(mut jail) => {
2733 // Setup a bind mount to the system D-Bus socket if the powerd monitor is used.
2734 #[cfg(feature = "power-monitor-powerd")]
2735 {
Fergus Dall51200512021-08-19 12:54:26 +10002736 add_current_user_to_jail(&mut jail)?;
Alex Lauf408c732020-11-10 18:24:04 +09002737
2738 // Create a tmpfs in the device's root directory so that we can bind mount files.
2739 jail.mount_with_data(
2740 Path::new("none"),
2741 Path::new("/"),
2742 "tmpfs",
2743 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
2744 "size=67108864",
2745 )?;
2746
2747 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
2748 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
2749 }
2750 Some(jail)
2751 }
2752 None => None,
2753 };
2754 (&cfg.battery_type, jail)
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002755 } else {
2756 (&cfg.battery_type, None)
2757 };
2758
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002759 let map_request: Arc<Mutex<Option<ExternalMapping>>> = Arc::new(Mutex::new(None));
2760
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002761 let fs_count = cfg
2762 .shared_dirs
2763 .iter()
2764 .filter(|sd| sd.kind == SharedDirKind::FS)
2765 .count();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002766 let mut fs_device_tubes = Vec::with_capacity(fs_count);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002767 for _ in 0..fs_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002768 let (fs_host_tube, fs_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002769 control_tubes.push(TaggedControlTube::Fs(fs_host_tube));
2770 fs_device_tubes.push(fs_device_tube);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002771 }
2772
Daniel Verkamp6b298582021-08-16 15:37:11 -07002773 let exit_evt = Event::new().context("failed to create event")?;
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002774 let reset_evt = Event::new().context("failed to create event")?;
Zach Reiznerdc748482021-04-14 13:59:30 -07002775 let mut sys_allocator = Arch::create_system_allocator(vm.get_memory());
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09002776
2777 // Allocate the ramoops region first. AArch64::build_vm() assumes this.
2778 let ramoops_region = match &components.pstore {
2779 Some(pstore) => Some(
Dennis Kempin65740a62021-10-18 16:46:57 -07002780 arch::pstore::create_memory_region(&mut vm, &mut sys_allocator, pstore)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002781 .context("failed to allocate pstore region")?,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09002782 ),
2783 None => None,
2784 };
2785
Zide Chen71435c12021-03-03 15:02:02 -08002786 let phys_max_addr = Arch::get_phys_max_addr();
Tomasz Nowickiab86d522021-09-22 05:50:46 +00002787 let mut devices = create_devices(
Zach Reiznerdc748482021-04-14 13:59:30 -07002788 &cfg,
2789 &mut vm,
2790 &mut sys_allocator,
2791 &exit_evt,
Zide Chen71435c12021-03-03 15:02:02 -08002792 phys_max_addr,
Zach Reiznerdc748482021-04-14 13:59:30 -07002793 &mut control_tubes,
2794 wayland_device_tube,
2795 gpu_device_tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002796 vhost_user_gpu_tubes,
Zach Reiznerdc748482021-04-14 13:59:30 -07002797 balloon_device_tube,
2798 &mut disk_device_tubes,
2799 &mut pmem_device_tubes,
2800 &mut fs_device_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07002801 #[cfg(feature = "usb")]
Zach Reiznerdc748482021-04-14 13:59:30 -07002802 usb_provider,
2803 Arc::clone(&map_request),
2804 )?;
2805
Peter Fangc2bba082021-04-19 18:40:24 -07002806 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Tomasz Nowickiab86d522021-09-22 05:50:46 +00002807 for device in devices
2808 .iter_mut()
2809 .filter_map(|(dev, _)| dev.as_pci_device_mut())
2810 {
Peter Fangc2bba082021-04-19 18:40:24 -07002811 let sdts = device
2812 .generate_acpi(components.acpi_sdts)
2813 .or_else(|| {
2814 error!("ACPI table generation error");
2815 None
2816 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002817 .ok_or_else(|| anyhow!("failed to generate ACPI table"))?;
Peter Fangc2bba082021-04-19 18:40:24 -07002818 components.acpi_sdts = sdts;
2819 }
2820
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002821 // KVM_CREATE_VCPU uses apic id for x86 and uses cpu id for others.
2822 let mut kvm_vcpu_ids = Vec::new();
2823
Kuo-Hsin Yang6139da62021-04-14 16:55:24 +08002824 #[cfg_attr(not(feature = "direct"), allow(unused_mut))]
Zach Reiznerdc748482021-04-14 13:59:30 -07002825 let mut linux = Arch::build_vm::<V, Vcpu>(
Trent Begin17ccaad2019-04-17 13:51:25 -06002826 components,
Zach Reiznerdc748482021-04-14 13:59:30 -07002827 &exit_evt,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002828 &reset_evt,
Zach Reiznerdc748482021-04-14 13:59:30 -07002829 &mut sys_allocator,
Trent Begin17ccaad2019-04-17 13:51:25 -06002830 &cfg.serial_parameters,
Matt Delco45caf912019-11-13 08:11:09 -08002831 simple_jail(&cfg, "serial")?,
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002832 battery,
Zach Reiznera90649a2021-03-31 12:56:08 -07002833 vm,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09002834 ramoops_region,
Tomasz Nowickiab86d522021-09-22 05:50:46 +00002835 devices,
Zach Reiznerdc748482021-04-14 13:59:30 -07002836 irq_chip,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002837 &mut kvm_vcpu_ids,
Trent Begin17ccaad2019-04-17 13:51:25 -06002838 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002839 .context("the architecture failed to build the vm")?;
Lepton Wu60893882018-11-21 11:06:18 -08002840
Daniel Verkamp1286b482021-11-30 15:14:16 -08002841 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2842 {
2843 // Create Pcie Root Port
2844 let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new()));
2845 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
2846 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
2847 let sec_bus = (1..255)
2848 .find(|&bus_num| sys_allocator.pci_bus_empty(bus_num))
2849 .context("failed to find empty bus for Pci hotplug")?;
2850 let pci_bridge = Box::new(PciBridge::new(
2851 pcie_root_port.clone(),
2852 msi_device_tube,
2853 0,
2854 sec_bus,
2855 ));
2856 Arch::register_pci_device(&mut linux, pci_bridge, None, &mut sys_allocator)
2857 .context("Failed to configure pci bridge device")?;
2858 linux.hotplug_bus.push(pcie_root_port);
2859 }
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08002860
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08002861 #[cfg(feature = "direct")]
2862 if let Some(pmio) = &cfg.direct_pmio {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002863 let direct_io = Arc::new(
2864 devices::DirectIo::new(&pmio.path, false).context("failed to open direct io device")?,
2865 );
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08002866 for range in pmio.ranges.iter() {
2867 linux
2868 .io_bus
Junichi Uekawab180f9c2021-12-07 09:21:36 +09002869 .insert_sync(direct_io.clone(), range.base, range.len)
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08002870 .unwrap();
2871 }
2872 };
2873
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002874 #[cfg(feature = "direct")]
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07002875 if let Some(mmio) = &cfg.direct_mmio {
Xiong Zhang46471a02021-11-12 00:34:42 +08002876 let direct_mmio = Arc::new(
Junichi Uekawab180f9c2021-12-07 09:21:36 +09002877 devices::DirectMmio::new(&mmio.path, false, &mmio.ranges)
Xiong Zhang46471a02021-11-12 00:34:42 +08002878 .context("failed to open direct mmio device")?,
Daniel Verkamp6b298582021-08-16 15:37:11 -07002879 );
Xiong Zhang46471a02021-11-12 00:34:42 +08002880
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07002881 for range in mmio.ranges.iter() {
2882 linux
2883 .mmio_bus
Junichi Uekawab180f9c2021-12-07 09:21:36 +09002884 .insert_sync(direct_mmio.clone(), range.base, range.len)
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07002885 .unwrap();
2886 }
2887 };
2888
2889 #[cfg(feature = "direct")]
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002890 let mut irqs = Vec::new();
2891
2892 #[cfg(feature = "direct")]
2893 for irq in &cfg.direct_level_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07002894 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002895 warn!("irq {} already reserved.", irq);
2896 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07002897 let trigger = Event::new().context("failed to create event")?;
2898 let resample = Event::new().context("failed to create event")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002899 linux
2900 .irq_chip
2901 .register_irq_event(*irq, &trigger, Some(&resample))
2902 .unwrap();
Daniel Verkamp6b298582021-08-16 15:37:11 -07002903 let direct_irq = devices::DirectIrq::new(trigger, Some(resample))
2904 .context("failed to enable interrupt forwarding")?;
2905 direct_irq
2906 .irq_enable(*irq)
2907 .context("failed to enable interrupt forwarding")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002908 irqs.push(direct_irq);
2909 }
2910
2911 #[cfg(feature = "direct")]
2912 for irq in &cfg.direct_edge_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07002913 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002914 warn!("irq {} already reserved.", irq);
2915 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07002916 let trigger = Event::new().context("failed to create event")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002917 linux
2918 .irq_chip
2919 .register_irq_event(*irq, &trigger, None)
2920 .unwrap();
Daniel Verkamp6b298582021-08-16 15:37:11 -07002921 let direct_irq = devices::DirectIrq::new(trigger, None)
2922 .context("failed to enable interrupt forwarding")?;
2923 direct_irq
2924 .irq_enable(*irq)
2925 .context("failed to enable interrupt forwarding")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002926 irqs.push(direct_irq);
2927 }
2928
Daniel Verkamp6b298582021-08-16 15:37:11 -07002929 let gralloc = RutabagaGralloc::new().context("failed to create gralloc")?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002930 run_control(
2931 linux,
Zach Reiznerdc748482021-04-14 13:59:30 -07002932 sys_allocator,
Zach Reiznera60744b2019-02-13 17:33:32 -08002933 control_server_socket,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002934 control_tubes,
2935 balloon_host_tube,
2936 &disk_host_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07002937 #[cfg(feature = "usb")]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002938 usb_control_tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07002939 exit_evt,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002940 reset_evt,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002941 sigchld_fd,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002942 cfg.sandbox,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002943 Arc::clone(&map_request),
Gurchetan Singh293913c2020-12-09 10:44:13 -08002944 gralloc,
Yusuke Sato31e136a2021-08-18 11:51:38 -07002945 cfg.per_vm_core_scheduling,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002946 cfg.host_cpu_topology,
2947 kvm_vcpu_ids,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002948 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07002949}
2950
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08002951fn get_hp_bus<V: VmArch, Vcpu: VcpuArch>(
2952 linux: &RunnableLinuxVm<V, Vcpu>,
2953 host_addr: PciAddress,
2954) -> Result<(Arc<Mutex<dyn HotPlugBus>>, u8)> {
2955 for hp_bus in linux.hotplug_bus.iter() {
2956 if let Some(number) = hp_bus.lock().is_match(host_addr) {
2957 return Ok((hp_bus.clone(), number));
2958 }
2959 }
2960 Err(anyhow!("Failed to find a suitable hotplug bus"))
2961}
2962
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002963#[allow(dead_code)]
2964fn add_vfio_device<V: VmArch, Vcpu: VcpuArch>(
2965 linux: &mut RunnableLinuxVm<V, Vcpu>,
2966 sys_allocator: &mut SystemAllocator,
2967 cfg: &Config,
2968 control_tubes: &mut Vec<TaggedControlTube>,
2969 vfio_path: &Path,
2970) -> Result<()> {
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08002971 let host_os_str = vfio_path
2972 .file_name()
2973 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
2974 let host_str = host_os_str
2975 .to_str()
2976 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
2977 let host_addr = PciAddress::from_string(host_str);
2978
2979 let (hp_bus, bus_num) = get_hp_bus(linux, host_addr)?;
2980
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002981 let mut endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>> = BTreeMap::new();
2982 let (vfio_pci_device, jail) = create_vfio_device(
2983 cfg,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002984 &linux.vm,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002985 sys_allocator,
2986 control_tubes,
2987 vfio_path,
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08002988 Some(bus_num),
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002989 &mut endpoints,
2990 false,
2991 )?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002992
2993 let pci_address = Arch::register_pci_device(linux, vfio_pci_device, jail, sys_allocator)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002994 .context("Failed to configure pci hotplug device")?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002995
Daniel Verkamp6b298582021-08-16 15:37:11 -07002996 let host_os_str = vfio_path
2997 .file_name()
2998 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
2999 let host_str = host_os_str
3000 .to_str()
3001 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003002 let host_addr = PciAddress::from_string(host_str);
3003 let host_key = HostHotPlugKey::Vfio { host_addr };
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003004 let mut hp_bus = hp_bus.lock();
3005 hp_bus.add_hotplug_device(host_key, pci_address);
3006 hp_bus.hot_plug(pci_address);
3007 Ok(())
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08003008}
3009
3010#[allow(dead_code)]
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003011fn remove_vfio_device<V: VmArch, Vcpu: VcpuArch>(
3012 linux: &RunnableLinuxVm<V, Vcpu>,
Xiong Zhang2d45b912021-05-13 16:22:25 +08003013 sys_allocator: &mut SystemAllocator,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003014 vfio_path: &Path,
3015) -> Result<()> {
Daniel Verkamp6b298582021-08-16 15:37:11 -07003016 let host_os_str = vfio_path
3017 .file_name()
3018 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
3019 let host_str = host_os_str
3020 .to_str()
3021 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003022 let host_addr = PciAddress::from_string(host_str);
3023 let host_key = HostHotPlugKey::Vfio { host_addr };
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003024 for hp_bus in linux.hotplug_bus.iter() {
3025 let mut hp_bus_lock = hp_bus.lock();
3026 if let Some(pci_addr) = hp_bus_lock.get_hotplug_device(host_key) {
3027 hp_bus_lock.hot_unplug(pci_addr);
Xiong Zhang2d45b912021-05-13 16:22:25 +08003028 sys_allocator.release_pci(pci_addr.bus, pci_addr.dev, pci_addr.func);
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003029 return Ok(());
3030 }
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003031 }
3032
Daniel Verkamp6b298582021-08-16 15:37:11 -07003033 Err(anyhow!("HotPlugBus hasn't been implemented"))
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003034}
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08003035
Daniel Verkamp29409802021-02-24 14:46:19 -08003036/// Signals all running VCPUs to vmexit, sends VcpuControl message to each VCPU tube, and tells
3037/// `irq_chip` to stop blocking halted VCPUs. The channel message is set first because both the
Steven Richman11dc6712020-09-02 15:39:14 -07003038/// signal and the irq_chip kick could cause the VCPU thread to continue through the VCPU run
3039/// loop.
3040fn kick_all_vcpus(
3041 vcpu_handles: &[(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)],
Zach Reiznerdc748482021-04-14 13:59:30 -07003042 irq_chip: &dyn IrqChip,
Daniel Verkamp29409802021-02-24 14:46:19 -08003043 message: VcpuControl,
Steven Richman11dc6712020-09-02 15:39:14 -07003044) {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003045 for (handle, tube) in vcpu_handles {
Daniel Verkamp29409802021-02-24 14:46:19 -08003046 if let Err(e) = tube.send(message.clone()) {
3047 error!("failed to send VcpuControl: {}", e);
Steven Richman11dc6712020-09-02 15:39:14 -07003048 }
3049 let _ = handle.kill(SIGRTMIN() + 0);
3050 }
3051 irq_chip.kick_halted_vcpus();
3052}
3053
Zach Reiznerdc748482021-04-14 13:59:30 -07003054fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
3055 mut linux: RunnableLinuxVm<V, Vcpu>,
3056 mut sys_allocator: SystemAllocator,
Zach Reiznera60744b2019-02-13 17:33:32 -08003057 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003058 mut control_tubes: Vec<TaggedControlTube>,
3059 balloon_host_tube: Tube,
3060 disk_host_tubes: &[Tube],
Daniel Verkampf1439d42021-05-21 13:55:10 -07003061 #[cfg(feature = "usb")] usb_control_tube: Tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07003062 exit_evt: Event,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003063 reset_evt: Event,
Zach Reizner55a9e502018-10-03 10:22:32 -07003064 sigchld_fd: SignalFd,
Lepton Wu20333e42019-03-14 10:48:03 -07003065 sandbox: bool,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08003066 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Gurchetan Singh293913c2020-12-09 10:44:13 -08003067 mut gralloc: RutabagaGralloc,
Yusuke Sato31e136a2021-08-18 11:51:38 -07003068 enable_per_vm_core_scheduling: bool,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08003069 host_cpu_topology: bool,
3070 kvm_vcpu_ids: Vec<usize>,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003071) -> Result<ExitState> {
Zach Reizner5bed0d22018-03-28 02:31:11 -07003072 #[derive(PollToken)]
3073 enum Token {
3074 Exit,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003075 Reset,
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003076 Suspend,
Zach Reizner5bed0d22018-03-28 02:31:11 -07003077 ChildSignal,
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003078 IrqFd { index: IrqEventIndex },
Zach Reiznera60744b2019-02-13 17:33:32 -08003079 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07003080 VmControl { index: usize },
3081 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003082
Zach Reizner19ad1f32019-12-12 18:58:50 -08003083 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08003084 .set_raw_mode()
3085 .expect("failed to set terminal raw mode");
3086
Michael Hoylee392c462020-10-07 03:29:24 -07003087 let wait_ctx = WaitContext::build_with(&[
Zach Reiznerdc748482021-04-14 13:59:30 -07003088 (&exit_evt, Token::Exit),
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003089 (&reset_evt, Token::Reset),
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003090 (&linux.suspend_evt, Token::Suspend),
Zach Reiznerb2110be2019-07-23 15:55:03 -07003091 (&sigchld_fd, Token::ChildSignal),
3092 ])
Daniel Verkamp6b298582021-08-16 15:37:11 -07003093 .context("failed to add descriptor to wait context")?;
Zach Reiznerb2110be2019-07-23 15:55:03 -07003094
Zach Reiznera60744b2019-02-13 17:33:32 -08003095 if let Some(socket_server) = &control_server_socket {
Michael Hoylee392c462020-10-07 03:29:24 -07003096 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08003097 .add(socket_server, Token::VmControlServer)
Daniel Verkamp6b298582021-08-16 15:37:11 -07003098 .context("failed to add descriptor to wait context")?;
Zach Reiznera60744b2019-02-13 17:33:32 -08003099 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003100 for (index, socket) in control_tubes.iter().enumerate() {
Michael Hoylee392c462020-10-07 03:29:24 -07003101 wait_ctx
Zach Reizner55a9e502018-10-03 10:22:32 -07003102 .add(socket.as_ref(), Token::VmControl { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07003103 .context("failed to add descriptor to wait context")?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08003104 }
3105
Steven Richmanf32d0b42020-06-20 21:45:32 -07003106 let events = linux
3107 .irq_chip
3108 .irq_event_tokens()
Daniel Verkamp6b298582021-08-16 15:37:11 -07003109 .context("failed to add descriptor to wait context")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07003110
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003111 for (index, _gsi, evt) in events {
Michael Hoylee392c462020-10-07 03:29:24 -07003112 wait_ctx
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003113 .add(&evt, Token::IrqFd { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07003114 .context("failed to add descriptor to wait context")?;
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08003115 }
3116
Lepton Wu20333e42019-03-14 10:48:03 -07003117 if sandbox {
3118 // Before starting VCPUs, in case we started with some capabilities, drop them all.
Daniel Verkamp6b298582021-08-16 15:37:11 -07003119 drop_capabilities().context("failed to drop process capabilities")?;
Lepton Wu20333e42019-03-14 10:48:03 -07003120 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08003121
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003122 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
3123 // Create a channel for GDB thread.
3124 let (to_gdb_channel, from_vcpu_channel) = if linux.gdb.is_some() {
3125 let (s, r) = mpsc::channel();
3126 (Some(s), Some(r))
3127 } else {
3128 (None, None)
3129 };
3130
Steven Richmanf32d0b42020-06-20 21:45:32 -07003131 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
3132 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
Steven Richmanf32d0b42020-06-20 21:45:32 -07003133 let use_hypervisor_signals = !linux
3134 .vm
3135 .get_hypervisor()
3136 .check_capability(&HypervisorCap::ImmediateExit);
Zach Reizner304e7312020-09-29 16:00:24 -07003137 setup_vcpu_signal_handler::<Vcpu>(use_hypervisor_signals)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07003138
Zach Reizner304e7312020-09-29 16:00:24 -07003139 let vcpus: Vec<Option<_>> = match linux.vcpus.take() {
Andrew Walbran9cfdbd92021-01-11 17:40:34 +00003140 Some(vec) => vec.into_iter().map(Some).collect(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07003141 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
3142 };
Yusuke Sato31e136a2021-08-18 11:51:38 -07003143 // Enable core scheduling before creating vCPUs so that the cookie will be
3144 // shared by all vCPU threads.
3145 // TODO(b/199312402): Avoid enabling core scheduling for the crosvm process
3146 // itself for even better performance. Only vCPUs need the feature.
3147 if enable_per_vm_core_scheduling {
3148 if let Err(e) = enable_core_scheduling() {
3149 error!("Failed to enable core scheduling: {}", e);
3150 }
3151 }
Daniel Verkamp94c35272019-09-12 13:31:30 -07003152 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Dylan Reidb0492662019-05-17 14:50:13 -07003153 let (to_vcpu_channel, from_main_channel) = mpsc::channel();
Daniel Verkampc677fb42020-09-08 13:47:49 -07003154 let vcpu_affinity = match linux.vcpu_affinity.clone() {
3155 Some(VcpuAffinity::Global(v)) => v,
3156 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(),
3157 None => Default::default(),
3158 };
Zach Reizner55a9e502018-10-03 10:22:32 -07003159 let handle = run_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07003160 cpu_id,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08003161 kvm_vcpu_ids[cpu_id],
Zach Reizner55a9e502018-10-03 10:22:32 -07003162 vcpu,
Daniel Verkamp6b298582021-08-16 15:37:11 -07003163 linux.vm.try_clone().context("failed to clone vm")?,
3164 linux
3165 .irq_chip
3166 .try_box_clone()
3167 .context("failed to clone irqchip")?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003168 linux.vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09003169 linux.rt_cpus.contains(&cpu_id),
Daniel Verkampc677fb42020-09-08 13:47:49 -07003170 vcpu_affinity,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09003171 linux.delay_rt,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09003172 linux.no_smt,
Zach Reizner55a9e502018-10-03 10:22:32 -07003173 vcpu_thread_barrier.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07003174 linux.has_bios,
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07003175 (*linux.io_bus).clone(),
3176 (*linux.mmio_bus).clone(),
Daniel Verkamp6b298582021-08-16 15:37:11 -07003177 exit_evt.try_clone().context("failed to clone event")?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003178 linux.vm.check_capability(VmCap::PvClockSuspend),
Dylan Reidb0492662019-05-17 14:50:13 -07003179 from_main_channel,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003180 use_hypervisor_signals,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003181 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
3182 to_gdb_channel.clone(),
Yusuke Sato31e136a2021-08-18 11:51:38 -07003183 enable_per_vm_core_scheduling,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08003184 host_cpu_topology,
Zach Reizner55a9e502018-10-03 10:22:32 -07003185 )?;
Dylan Reidb0492662019-05-17 14:50:13 -07003186 vcpu_handles.push((handle, to_vcpu_channel));
Dylan Reid059a1882018-07-23 17:58:09 -07003187 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07003188
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003189 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
3190 // Spawn GDB thread.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003191 if let Some((gdb_port_num, gdb_control_tube)) = linux.gdb.take() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003192 let to_vcpu_channels = vcpu_handles
3193 .iter()
3194 .map(|(_handle, channel)| channel.clone())
3195 .collect();
3196 let target = GdbStub::new(
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003197 gdb_control_tube,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003198 to_vcpu_channels,
3199 from_vcpu_channel.unwrap(), // Must succeed to unwrap()
3200 );
3201 thread::Builder::new()
3202 .name("gdb".to_owned())
3203 .spawn(move || gdb_thread(target, gdb_port_num))
Daniel Verkamp6b298582021-08-16 15:37:11 -07003204 .context("failed to spawn GDB thread")?;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003205 };
3206
Dylan Reid059a1882018-07-23 17:58:09 -07003207 vcpu_thread_barrier.wait();
3208
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003209 let mut exit_state = ExitState::Stop;
Charles William Dick54045012021-07-27 19:11:53 +09003210 let mut balloon_stats_id: u64 = 0;
3211
Michael Hoylee392c462020-10-07 03:29:24 -07003212 'wait: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07003213 let events = {
Michael Hoylee392c462020-10-07 03:29:24 -07003214 match wait_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08003215 Ok(v) => v,
3216 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08003217 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003218 break;
3219 }
3220 }
3221 };
Zach Reiznera60744b2019-02-13 17:33:32 -08003222
Steven Richmanf32d0b42020-06-20 21:45:32 -07003223 if let Err(e) = linux.irq_chip.process_delayed_irq_events() {
3224 warn!("can't deliver delayed irqs: {}", e);
3225 }
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08003226
Zach Reiznera60744b2019-02-13 17:33:32 -08003227 let mut vm_control_indices_to_remove = Vec::new();
Michael Hoylee392c462020-10-07 03:29:24 -07003228 for event in events.iter().filter(|e| e.is_readable) {
3229 match event.token {
Zach Reizner5bed0d22018-03-28 02:31:11 -07003230 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08003231 info!("vcpu requested shutdown");
Michael Hoylee392c462020-10-07 03:29:24 -07003232 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08003233 }
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003234 Token::Reset => {
3235 info!("vcpu requested reset");
3236 exit_state = ExitState::Reset;
3237 break 'wait;
3238 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003239 Token::Suspend => {
3240 info!("VM requested suspend");
3241 linux.suspend_evt.read().unwrap();
Zach Reiznerdc748482021-04-14 13:59:30 -07003242 kick_all_vcpus(
3243 &vcpu_handles,
3244 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003245 VcpuControl::RunState(VmRunMode::Suspending),
Zach Reiznerdc748482021-04-14 13:59:30 -07003246 );
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003247 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003248 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08003249 // Print all available siginfo structs, then exit the loop.
Daniel Verkamp6b298582021-08-16 15:37:11 -07003250 while let Some(siginfo) =
3251 sigchld_fd.read().context("failed to create signalfd")?
3252 {
Zach Reizner3ba00982019-01-23 19:04:43 -08003253 let pid = siginfo.ssi_pid;
3254 let pid_label = match linux.pid_debug_label_map.get(&pid) {
3255 Some(label) => format!("{} (pid {})", label, pid),
3256 None => format!("pid {}", pid),
3257 };
David Tolnayf5032762018-12-03 10:46:45 -08003258 error!(
3259 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08003260 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08003261 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08003262 }
Michael Hoylee392c462020-10-07 03:29:24 -07003263 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08003264 }
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003265 Token::IrqFd { index } => {
3266 if let Err(e) = linux.irq_chip.service_irq_event(index) {
3267 error!("failed to signal irq {}: {}", index, e);
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08003268 }
3269 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003270 Token::VmControlServer => {
3271 if let Some(socket_server) = &control_server_socket {
3272 match socket_server.accept() {
3273 Ok(socket) => {
Michael Hoylee392c462020-10-07 03:29:24 -07003274 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08003275 .add(
3276 &socket,
3277 Token::VmControl {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003278 index: control_tubes.len(),
Zach Reiznera60744b2019-02-13 17:33:32 -08003279 },
3280 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07003281 .context("failed to add descriptor to wait context")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003282 control_tubes.push(TaggedControlTube::Vm(Tube::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08003283 }
3284 Err(e) => error!("failed to accept socket: {}", e),
3285 }
3286 }
3287 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003288 Token::VmControl { index } => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003289 if let Some(socket) = control_tubes.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003290 match socket {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003291 TaggedControlTube::Vm(tube) => match tube.recv::<VmRequest>() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003292 Ok(request) => {
3293 let mut run_mode_opt = None;
3294 let response = request.execute(
3295 &mut run_mode_opt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003296 &balloon_host_tube,
Charles William Dick54045012021-07-27 19:11:53 +09003297 &mut balloon_stats_id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003298 disk_host_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07003299 #[cfg(feature = "usb")]
3300 Some(&usb_control_tube),
3301 #[cfg(not(feature = "usb"))]
3302 None,
Chuanxiao Dong256be3a2020-04-27 16:39:33 +08003303 &mut linux.bat_control,
Suleiman Souhlal2ac78b92021-02-01 12:33:26 +09003304 &vcpu_handles,
Jakub Starond99cd0a2019-04-11 14:09:39 -07003305 );
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003306 if let Err(e) = tube.send(&response) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003307 error!("failed to send VmResponse: {}", e);
3308 }
3309 if let Some(run_mode) = run_mode_opt {
3310 info!("control socket changed run mode to {}", run_mode);
3311 match run_mode {
3312 VmRunMode::Exiting => {
Michael Hoylee392c462020-10-07 03:29:24 -07003313 break 'wait;
Jakub Starond99cd0a2019-04-11 14:09:39 -07003314 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003315 other => {
Chuanxiao Dong2bbe85c2020-11-12 17:18:07 +08003316 if other == VmRunMode::Running {
Daniel Verkampda4e8a92021-07-21 13:49:02 -07003317 for dev in &linux.resume_notify_devices {
3318 dev.lock().resume_imminent();
3319 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003320 }
Steven Richman11dc6712020-09-02 15:39:14 -07003321 kick_all_vcpus(
3322 &vcpu_handles,
Zach Reiznerdc748482021-04-14 13:59:30 -07003323 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003324 VcpuControl::RunState(other),
Steven Richman11dc6712020-09-02 15:39:14 -07003325 );
Zach Reizner6a8fdd92019-01-16 14:38:41 -08003326 }
3327 }
3328 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003329 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003330 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003331 if let TubeError::Disconnected = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003332 vm_control_indices_to_remove.push(index);
3333 } else {
3334 error!("failed to recv VmRequest: {}", e);
3335 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003336 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003337 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003338 TaggedControlTube::VmMemory(tube) => {
3339 match tube.recv::<VmMemoryRequest>() {
3340 Ok(request) => {
3341 let response = request.execute(
3342 &mut linux.vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07003343 &mut sys_allocator,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003344 Arc::clone(&map_request),
3345 &mut gralloc,
3346 );
3347 if let Err(e) = tube.send(&response) {
3348 error!("failed to send VmMemoryControlResponse: {}", e);
3349 }
3350 }
3351 Err(e) => {
3352 if let TubeError::Disconnected = e {
3353 vm_control_indices_to_remove.push(index);
3354 } else {
3355 error!("failed to recv VmMemoryControlRequest: {}", e);
3356 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003357 }
3358 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003359 }
3360 TaggedControlTube::VmIrq(tube) => match tube.recv::<VmIrqRequest>() {
Xiong Zhang2515b752019-09-19 10:29:02 +08003361 Ok(request) => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07003362 let response = {
3363 let irq_chip = &mut linux.irq_chip;
3364 request.execute(
3365 |setup| match setup {
3366 IrqSetup::Event(irq, ev) => {
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003367 if let Some(event_index) = irq_chip
3368 .register_irq_event(irq, ev, None)?
3369 {
3370 match wait_ctx.add(
3371 ev,
3372 Token::IrqFd {
3373 index: event_index
3374 },
3375 ) {
3376 Err(e) => {
3377 warn!("failed to add IrqFd to poll context: {}", e);
3378 Err(e)
3379 },
3380 Ok(_) => {
3381 Ok(())
3382 }
3383 }
3384 } else {
3385 Ok(())
3386 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07003387 }
3388 IrqSetup::Route(route) => irq_chip.route_irq(route),
3389 },
Zach Reiznerdc748482021-04-14 13:59:30 -07003390 &mut sys_allocator,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003391 )
3392 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003393 if let Err(e) = tube.send(&response) {
Xiong Zhang2515b752019-09-19 10:29:02 +08003394 error!("failed to send VmIrqResponse: {}", e);
3395 }
3396 }
3397 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003398 if let TubeError::Disconnected = e {
Xiong Zhang2515b752019-09-19 10:29:02 +08003399 vm_control_indices_to_remove.push(index);
3400 } else {
3401 error!("failed to recv VmIrqRequest: {}", e);
3402 }
3403 }
3404 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003405 TaggedControlTube::VmMsync(tube) => {
3406 match tube.recv::<VmMsyncRequest>() {
3407 Ok(request) => {
3408 let response = request.execute(&mut linux.vm);
3409 if let Err(e) = tube.send(&response) {
3410 error!("failed to send VmMsyncResponse: {}", e);
3411 }
3412 }
3413 Err(e) => {
3414 if let TubeError::Disconnected = e {
3415 vm_control_indices_to_remove.push(index);
3416 } else {
3417 error!("failed to recv VmMsyncRequest: {}", e);
3418 }
Daniel Verkampe1980a92020-02-07 11:00:55 -08003419 }
3420 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003421 }
3422 TaggedControlTube::Fs(tube) => match tube.recv::<FsMappingRequest>() {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003423 Ok(request) => {
3424 let response =
Zach Reiznerdc748482021-04-14 13:59:30 -07003425 request.execute(&mut linux.vm, &mut sys_allocator);
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003426 if let Err(e) = tube.send(&response) {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003427 error!("failed to send VmResponse: {}", e);
3428 }
3429 }
3430 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003431 if let TubeError::Disconnected = e {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003432 vm_control_indices_to_remove.push(index);
3433 } else {
3434 error!("failed to recv VmResponse: {}", e);
3435 }
3436 }
3437 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08003438 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003439 }
3440 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003441 }
3442 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003443
Vikram Auradkarede68c72021-07-01 14:33:54 -07003444 // It's possible more data is readable and buffered while the socket is hungup,
3445 // so don't delete the tube from the poll context until we're sure all the
3446 // data is read.
3447 // Below case covers a condition where we have received a hungup event and the tube is not
3448 // readable.
3449 // In case of readable tube, once all data is read, any attempt to read more data on hungup
3450 // tube should fail. On such failure, we get Disconnected error and index gets added to
3451 // vm_control_indices_to_remove by the time we reach here.
3452 for event in events.iter().filter(|e| e.is_hungup && !e.is_readable) {
3453 if let Token::VmControl { index } = event.token {
3454 vm_control_indices_to_remove.push(index);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003455 }
3456 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003457
3458 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08003459 // preserves correct indexes as each element is removed.
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07003460 vm_control_indices_to_remove.sort_unstable_by_key(|&k| Reverse(k));
Zach Reiznera60744b2019-02-13 17:33:32 -08003461 vm_control_indices_to_remove.dedup();
3462 for index in vm_control_indices_to_remove {
Michael Hoylee392c462020-10-07 03:29:24 -07003463 // Delete the socket from the `wait_ctx` synchronously. Otherwise, the kernel will do
3464 // this automatically when the FD inserted into the `wait_ctx` is closed after this
Zide Chen89584072019-11-14 10:33:51 -08003465 // if-block, but this removal can be deferred unpredictably. In some instances where the
Michael Hoylee392c462020-10-07 03:29:24 -07003466 // system is under heavy load, we can even get events returned by `wait_ctx` for an FD
Zide Chen89584072019-11-14 10:33:51 -08003467 // that has already been closed. Because the token associated with that spurious event
3468 // now belongs to a different socket, the control loop will start to interact with
3469 // sockets that might not be ready to use. This can cause incorrect hangup detection or
3470 // blocking on a socket that will never be ready. See also: crbug.com/1019986
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003471 if let Some(socket) = control_tubes.get(index) {
Daniel Verkamp6b298582021-08-16 15:37:11 -07003472 wait_ctx
3473 .delete(socket)
3474 .context("failed to remove descriptor from wait context")?;
Zide Chen89584072019-11-14 10:33:51 -08003475 }
3476
3477 // This line implicitly drops the socket at `index` when it gets returned by
3478 // `swap_remove`. After this line, the socket at `index` is not the one from
3479 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
Michael Hoylee392c462020-10-07 03:29:24 -07003480 // use `wait_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003481 control_tubes.swap_remove(index);
3482 if let Some(tube) = control_tubes.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07003483 wait_ctx
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003484 .modify(tube, EventType::Read, Token::VmControl { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07003485 .context("failed to add descriptor to wait context")?;
Zach Reiznera60744b2019-02-13 17:33:32 -08003486 }
3487 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003488 }
3489
Zach Reiznerdc748482021-04-14 13:59:30 -07003490 kick_all_vcpus(
3491 &vcpu_handles,
3492 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003493 VcpuControl::RunState(VmRunMode::Exiting),
Zach Reiznerdc748482021-04-14 13:59:30 -07003494 );
Steven Richman11dc6712020-09-02 15:39:14 -07003495 for (handle, _) in vcpu_handles {
3496 if let Err(e) = handle.join() {
3497 error!("failed to join vcpu thread: {:?}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003498 }
3499 }
3500
Daniel Verkamp94c35272019-09-12 13:31:30 -07003501 // Explicitly drop the VM structure here to allow the devices to clean up before the
3502 // control sockets are closed when this function exits.
3503 mem::drop(linux);
3504
Zach Reizner19ad1f32019-12-12 18:58:50 -08003505 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08003506 .set_canon_mode()
3507 .expect("failed to restore canonical mode for terminal");
3508
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003509 Ok(exit_state)
Zach Reizner39aa26b2017-12-12 18:03:23 -08003510}