blob: 3d2bf40806e89f31a988ab7b79495627dcbef7b7 [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Hikaru Nishida584e52c2021-04-27 17:37:08 +09005use std::cmp::Reverse;
Zide Chendfc4b882021-03-10 16:35:37 -08006use std::collections::BTreeMap;
Jakub Starona3411ea2019-04-24 10:55:25 -07007use std::convert::TryFrom;
John Batesb220eac2020-09-14 17:03:02 -07008#[cfg(feature = "gpu")]
9use std::env;
Dylan Reid059a1882018-07-23 17:58:09 -070010use std::fs::{File, OpenOptions};
Federico 'Morg' Pareschia1184822021-09-09 10:52:58 +090011use std::io::stdin;
Steven Richmanf32d0b42020-06-20 21:45:32 -070012use std::iter;
Daniel Verkamp94c35272019-09-12 13:31:30 -070013use std::mem;
David Tolnay2b089fc2019-03-04 15:33:22 -080014use std::net::Ipv4Addr;
Christian Blichmann50f95912021-11-05 16:59:39 +010015use std::os::unix::{io::FromRawFd, net::UnixStream, prelude::OpenOptionsExt};
Zach Reizner39aa26b2017-12-12 18:03:23 -080016use std::path::{Path, PathBuf};
Chirantan Ekbote448516e2018-07-24 16:07:42 -070017use std::str;
Dylan Reidb0492662019-05-17 14:50:13 -070018use std::sync::{mpsc, Arc, Barrier};
Hikaru Nishida584e52c2021-04-27 17:37:08 +090019use std::time::Duration;
Dylan Reidb0492662019-05-17 14:50:13 -070020
Zach Reizner39aa26b2017-12-12 18:03:23 -080021use std::thread;
22use std::thread::JoinHandle;
23
Daniel Verkamp6b298582021-08-16 15:37:11 -070024use libc::{self, c_int, gid_t, uid_t};
Zach Reizner39aa26b2017-12-12 18:03:23 -080025
Tomasz Jeznach42644642020-05-20 23:27:59 -070026use acpi_tables::sdt::SDT;
27
Daniel Verkamp6b298582021-08-16 15:37:11 -070028use anyhow::{anyhow, bail, Context, Result};
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +090029use base::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080030use base::*;
Keiichi Watanabe553d2192021-08-16 16:42:27 +090031use devices::serial_device::{SerialHardware, SerialParameters};
Zide Chenafdb9382021-06-17 12:04:43 -070032use devices::vfio::{VfioCommonSetup, VfioCommonTrait};
Woody Chow0b2b6062021-09-03 15:40:02 +090033#[cfg(feature = "audio_cras")]
34use devices::virtio::snd::cras_backend::Parameters as CrasSndParameters;
Woody Chow1b16db12021-04-02 16:59:59 +090035#[cfg(feature = "audio")]
36use devices::virtio::vhost::user::vmm::Snd as VhostUserSnd;
Keiichi Watanabefb36e0c2021-08-13 18:48:31 +090037use devices::virtio::vhost::user::vmm::{
Richard5afeafa2021-07-26 19:02:09 -070038 Block as VhostUserBlock, Console as VhostUserConsole, Fs as VhostUserFs,
Chirantan Ekbote84091e52021-09-10 18:43:17 +090039 Mac80211Hwsim as VhostUserMac80211Hwsim, Net as VhostUserNet, Vsock as VhostUserVsock,
40 Wl as VhostUserWl,
Keiichi Watanabe60686582021-03-12 04:53:51 +090041};
Alexandre Courbotb42b3e52021-07-09 23:38:57 +090042#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
43use devices::virtio::VideoBackendType;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070044use devices::virtio::{self, Console, VirtioDevice};
Chirantan Ekbote44292f52021-06-25 18:31:41 +090045#[cfg(feature = "gpu")]
46use devices::virtio::{
47 gpu::{DEFAULT_DISPLAY_HEIGHT, DEFAULT_DISPLAY_WIDTH},
48 vhost::user::vmm::Gpu as VhostUserGpu,
49 EventDevice,
50};
paulhsiace17e6e2020-08-28 18:37:45 +080051#[cfg(feature = "audio")]
52use devices::Ac97Dev;
Will Deaconc48e7832021-07-30 19:03:06 +010053use devices::ProtectionType;
Xiong Zhang17b0daf2019-04-23 17:14:50 +080054use devices::{
Xiong Zhangf82f2dc2021-05-21 16:54:12 +080055 self, BusDeviceObj, HostHotPlugKey, HotPlugBus, IrqChip, IrqEventIndex, KvmKernelIrqChip,
56 PciAddress, PciBridge, PciDevice, PcieRootPort, StubPciDevice, VcpuRunState, VfioContainer,
57 VfioDevice, VfioPciDevice, VfioPlatformDevice, VirtioPciDevice,
Xiong Zhang17b0daf2019-04-23 17:14:50 +080058};
Daniel Verkampf1439d42021-05-21 13:55:10 -070059#[cfg(feature = "usb")]
60use devices::{HostBackendDeviceProvider, XhciController};
Steven Richmanf32d0b42020-06-20 21:45:32 -070061use hypervisor::kvm::{Kvm, KvmVcpu, KvmVm};
Xiong Zhangdea7dbb2021-07-26 14:49:03 +080062use hypervisor::{HypervisorCap, Vcpu, VcpuExit, VcpuRunHandle, Vm, VmCap};
Allen Webbf3024c82020-06-19 07:19:48 -070063use minijail::{self, Minijail};
Richard5afeafa2021-07-26 19:02:09 -070064use net_util::{MacAddress, Tap};
Xiong Zhang87a3b442019-10-29 17:32:44 +080065use resources::{Alloc, MmioType, SystemAllocator};
Gurchetan Singh293913c2020-12-09 10:44:13 -080066use rutabaga_gfx::RutabagaGralloc;
Dylan Reidb0492662019-05-17 14:50:13 -070067use sync::Mutex;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080068use vm_control::*;
Sergey Senozhatskyd78d05b2021-04-13 20:59:58 +090069use vm_memory::{GuestAddress, GuestMemory, MemoryPolicy};
Zach Reizner39aa26b2017-12-12 18:03:23 -080070
Keiichi Watanabec5262e92020-10-21 15:57:33 +090071#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
72use crate::gdb::{gdb_thread, GdbStub};
Keiichi Watanabef3a37f42021-01-21 15:41:11 +090073use crate::{
Tomasz Nowicki71aca792021-06-09 18:53:49 +000074 Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption, VfioType,
Christian Blichmann50f95912021-11-05 16:59:39 +010075 VhostUserFsOption, VhostUserOption, VhostUserWlOption, VhostVsockDeviceParameter,
Keiichi Watanabef3a37f42021-01-21 15:41:11 +090076};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070077use arch::{
Keiichi Watanabe553d2192021-08-16 16:42:27 +090078 self, LinuxArch, RunnableLinuxVm, VcpuAffinity, VirtioDeviceStub, VmComponents, VmImage,
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070079};
Sonny Raoed517d12018-02-13 22:09:43 -080080
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080081#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070082use {
83 aarch64::AArch64 as Arch,
Steven Richman11dc6712020-09-02 15:39:14 -070084 devices::IrqChipAArch64 as IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -070085 hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
86};
Zach Reizner55a9e502018-10-03 10:22:32 -070087#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070088use {
Steven Richman11dc6712020-09-02 15:39:14 -070089 devices::{IrqChipX86_64 as IrqChipArch, KvmSplitIrqChip},
90 hypervisor::{VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
Steven Richmanf32d0b42020-06-20 21:45:32 -070091 x86_64::X8664arch as Arch,
92};
Zach Reizner39aa26b2017-12-12 18:03:23 -080093
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080094enum TaggedControlTube {
95 Fs(Tube),
96 Vm(Tube),
97 VmMemory(Tube),
98 VmIrq(Tube),
99 VmMsync(Tube),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700100}
101
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800102impl AsRef<Tube> for TaggedControlTube {
103 fn as_ref(&self) -> &Tube {
104 use self::TaggedControlTube::*;
Jakub Starond99cd0a2019-04-11 14:09:39 -0700105 match &self {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800106 Fs(tube) | Vm(tube) | VmMemory(tube) | VmIrq(tube) | VmMsync(tube) => tube,
Jakub Starond99cd0a2019-04-11 14:09:39 -0700107 }
108 }
109}
110
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800111impl AsRawDescriptor for TaggedControlTube {
Michael Hoylee392c462020-10-07 03:29:24 -0700112 fn as_raw_descriptor(&self) -> RawDescriptor {
Michael Hoylea596a072020-11-10 19:32:45 -0800113 self.as_ref().as_raw_descriptor()
Jakub Starond99cd0a2019-04-11 14:09:39 -0700114 }
115}
116
Matt Delcoc24ad782020-02-14 13:24:36 -0800117struct SandboxConfig<'a> {
118 limit_caps: bool,
119 log_failures: bool,
120 seccomp_policy: &'a Path,
121 uid_map: Option<&'a str>,
122 gid_map: Option<&'a str>,
123}
124
Zach Reizner44863792019-06-26 14:22:08 -0700125fn create_base_minijail(
126 root: &Path,
Matt Delcoc24ad782020-02-14 13:24:36 -0800127 r_limit: Option<u64>,
128 config: Option<&SandboxConfig>,
Zach Reizner44863792019-06-26 14:22:08 -0700129) -> Result<Minijail> {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800130 // All child jails run in a new user namespace without any users mapped,
131 // they run as nobody unless otherwise configured.
Daniel Verkamp6b298582021-08-16 15:37:11 -0700132 let mut j = Minijail::new().context("failed to jail device")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800133
134 if let Some(config) = config {
135 j.namespace_pids();
136 j.namespace_user();
137 j.namespace_user_disable_setgroups();
138 if config.limit_caps {
139 // Don't need any capabilities.
140 j.use_caps(0);
141 }
142 if let Some(uid_map) = config.uid_map {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700143 j.uidmap(uid_map).context("error setting UID map")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800144 }
145 if let Some(gid_map) = config.gid_map {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700146 j.gidmap(gid_map).context("error setting GID map")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800147 }
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900148 // Run in a new mount namespace.
149 j.namespace_vfs();
150
Matt Delcoc24ad782020-02-14 13:24:36 -0800151 // Run in an empty network namespace.
152 j.namespace_net();
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900153
154 // Don't allow the device to gain new privileges.
Matt Delcoc24ad782020-02-14 13:24:36 -0800155 j.no_new_privs();
156
157 // By default we'll prioritize using the pre-compiled .bpf over the .policy
158 // file (the .bpf is expected to be compiled using "trap" as the failure
159 // behavior instead of the default "kill" behavior).
160 // Refer to the code comment for the "seccomp-log-failures"
161 // command-line parameter for an explanation about why the |log_failures|
162 // flag forces the use of .policy files (and the build-time alternative to
163 // this run-time flag).
164 let bpf_policy_file = config.seccomp_policy.with_extension("bpf");
165 if bpf_policy_file.exists() && !config.log_failures {
166 j.parse_seccomp_program(&bpf_policy_file)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700167 .context("failed to parse precompiled seccomp policy")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800168 } else {
169 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP,
170 // which will correctly kill the entire device process if a worker
171 // thread commits a seccomp violation.
172 j.set_seccomp_filter_tsync();
173 if config.log_failures {
174 j.log_seccomp_filter_failures();
175 }
176 j.parse_seccomp_filters(&config.seccomp_policy.with_extension("policy"))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700177 .context("failed to parse seccomp policy")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800178 }
179 j.use_seccomp_filter();
180 // Don't do init setup.
181 j.run_as_init();
182 }
183
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900184 // Only pivot_root if we are not re-using the current root directory.
185 if root != Path::new("/") {
186 // It's safe to call `namespace_vfs` multiple times.
187 j.namespace_vfs();
Daniel Verkamp6b298582021-08-16 15:37:11 -0700188 j.enter_pivot_root(root)
189 .context("failed to pivot root device")?;
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900190 }
Matt Delco45caf912019-11-13 08:11:09 -0800191
Matt Delcoc24ad782020-02-14 13:24:36 -0800192 // Most devices don't need to open many fds.
193 let limit = if let Some(r) = r_limit { r } else { 1024u64 };
194 j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700195 .context("error setting max open files")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800196
Zach Reizner39aa26b2017-12-12 18:03:23 -0800197 Ok(j)
198}
199
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800200fn simple_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
Lepton Wu9105e9f2019-03-14 11:38:31 -0700201 if cfg.sandbox {
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800202 let pivot_root: &str = option_env!("DEFAULT_PIVOT_ROOT").unwrap_or("/var/empty");
203 // A directory for a jailed device's pivot root.
204 let root_path = Path::new(pivot_root);
205 if !root_path.exists() {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700206 bail!("{} doesn't exist, can't jail devices", pivot_root);
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800207 }
208 let policy_path: PathBuf = cfg.seccomp_policy_dir.join(policy);
Matt Delcoc24ad782020-02-14 13:24:36 -0800209 let config = SandboxConfig {
210 limit_caps: true,
211 log_failures: cfg.seccomp_log_failures,
212 seccomp_policy: &policy_path,
213 uid_map: None,
214 gid_map: None,
215 };
216 Ok(Some(create_base_minijail(root_path, None, Some(&config))?))
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800217 } else {
218 Ok(None)
219 }
220}
221
Daniel Verkamp6b298582021-08-16 15:37:11 -0700222type DeviceResult<T = VirtioDeviceStub> = Result<T>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800223
Andrew Walbran4cad30a2021-06-28 15:58:08 +0000224fn create_block_device(cfg: &Config, disk: &DiskOption, disk_device_tube: Tube) -> DeviceResult {
Junichi Uekawa7bea39f2021-07-16 14:05:06 +0900225 let raw_image: File = open_file(&disk.path, disk.read_only, disk.o_direct)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700226 .with_context(|| format!("failed to load disk image {}", disk.path.display()))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800227 // Lock the disk image to prevent other crosvm instances from using it.
228 let lock_op = if disk.read_only {
229 FlockOperation::LockShared
230 } else {
231 FlockOperation::LockExclusive
232 };
Daniel Verkamp6b298582021-08-16 15:37:11 -0700233 flock(&raw_image, lock_op, true).context("failed to lock disk image")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800234
Junichi Uekawa52437db2021-09-29 17:33:07 +0900235 info!("Trying to attach block device: {}", disk.path.display());
Daniel Verkamp6b298582021-08-16 15:37:11 -0700236 let dev = if disk::async_ok(&raw_image).context("failed to check disk async_ok")? {
237 let async_file = disk::create_async_disk_file(raw_image)
238 .context("failed to create async virtual disk")?;
Dylan Reid503c5ab2020-07-17 11:20:07 -0700239 Box::new(
240 virtio::BlockAsync::new(
241 virtio::base_features(cfg.protected_vm),
242 async_file,
243 disk.read_only,
244 disk.sparse,
245 disk.block_size,
Daniel Verkampdd0ee592021-03-29 13:05:22 -0700246 disk.id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800247 Some(disk_device_tube),
Dylan Reid503c5ab2020-07-17 11:20:07 -0700248 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700249 .context("failed to create block device")?,
Dylan Reid503c5ab2020-07-17 11:20:07 -0700250 ) as Box<dyn VirtioDevice>
251 } else {
Daniel Verkampeb1640e2021-09-07 14:09:31 -0700252 let disk_file = disk::create_disk_file(raw_image, disk::MAX_NESTING_DEPTH)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700253 .context("failed to create virtual disk")?;
Dylan Reid503c5ab2020-07-17 11:20:07 -0700254 Box::new(
255 virtio::Block::new(
256 virtio::base_features(cfg.protected_vm),
257 disk_file,
258 disk.read_only,
259 disk.sparse,
260 disk.block_size,
261 disk.id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800262 Some(disk_device_tube),
Dylan Reid503c5ab2020-07-17 11:20:07 -0700263 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700264 .context("failed to create block device")?,
Dylan Reid503c5ab2020-07-17 11:20:07 -0700265 ) as Box<dyn VirtioDevice>
266 };
David Tolnay2b089fc2019-03-04 15:33:22 -0800267
268 Ok(VirtioDeviceStub {
Dylan Reid503c5ab2020-07-17 11:20:07 -0700269 dev,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700270 jail: simple_jail(cfg, "block_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800271 })
272}
273
Keiichi Watanabef3a37f42021-01-21 15:41:11 +0900274fn create_vhost_user_block_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
275 let dev = VhostUserBlock::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700276 .context("failed to set up vhost-user block device")?;
Keiichi Watanabef3a37f42021-01-21 15:41:11 +0900277
278 Ok(VirtioDeviceStub {
279 dev: Box::new(dev),
280 // no sandbox here because virtqueue handling is exported to a different process.
281 jail: None,
282 })
283}
284
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +0900285fn create_vhost_user_console_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
286 let dev = VhostUserConsole::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700287 .context("failed to set up vhost-user console device")?;
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +0900288
289 Ok(VirtioDeviceStub {
290 dev: Box::new(dev),
291 // no sandbox here because virtqueue handling is exported to a different process.
292 jail: None,
293 })
294}
295
Woody Chow5890b702021-02-12 14:57:02 +0900296fn create_vhost_user_fs_device(cfg: &Config, option: &VhostUserFsOption) -> DeviceResult {
297 let dev = VhostUserFs::new(
298 virtio::base_features(cfg.protected_vm),
299 &option.socket,
300 &option.tag,
301 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700302 .context("failed to set up vhost-user fs device")?;
Woody Chow5890b702021-02-12 14:57:02 +0900303
304 Ok(VirtioDeviceStub {
305 dev: Box::new(dev),
306 // no sandbox here because virtqueue handling is exported to a different process.
307 jail: None,
308 })
309}
310
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900311fn create_vhost_user_mac80211_hwsim_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
312 let dev = VhostUserMac80211Hwsim::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700313 .context("failed to set up vhost-user mac80211_hwsim device")?;
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900314
315 Ok(VirtioDeviceStub {
316 dev: Box::new(dev),
317 // no sandbox here because virtqueue handling is exported to a different process.
318 jail: None,
319 })
320}
321
Woody Chow1b16db12021-04-02 16:59:59 +0900322#[cfg(feature = "audio")]
323fn create_vhost_user_snd_device(cfg: &Config, option: &VhostUserOption) -> DeviceResult {
324 let dev = VhostUserSnd::new(virtio::base_features(cfg.protected_vm), &option.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700325 .context("failed to set up vhost-user snd device")?;
Woody Chow1b16db12021-04-02 16:59:59 +0900326
327 Ok(VirtioDeviceStub {
328 dev: Box::new(dev),
329 // no sandbox here because virtqueue handling is exported to a different process.
330 jail: None,
331 })
332}
333
David Tolnay2b089fc2019-03-04 15:33:22 -0800334fn create_rng_device(cfg: &Config) -> DeviceResult {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700335 let dev = virtio::Rng::new(virtio::base_features(cfg.protected_vm))
336 .context("failed to set up rng")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800337
338 Ok(VirtioDeviceStub {
339 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700340 jail: simple_jail(cfg, "rng_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800341 })
342}
343
Woody Chow737ff122021-03-22 17:49:57 +0900344#[cfg(feature = "audio_cras")]
Woody Chow0b2b6062021-09-03 15:40:02 +0900345fn create_cras_snd_device(cfg: &Config, cras_snd: CrasSndParameters) -> DeviceResult {
346 let dev = virtio::snd::cras_backend::VirtioSndCras::new(
347 virtio::base_features(cfg.protected_vm),
348 cras_snd,
349 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700350 .context("failed to create cras sound device")?;
Woody Chow737ff122021-03-22 17:49:57 +0900351
352 let jail = match simple_jail(&cfg, "cras_snd_device")? {
353 Some(mut jail) => {
354 // Create a tmpfs in the device's root directory for cras_snd_device.
355 // The size is 20*1024, or 20 KB.
356 jail.mount_with_data(
357 Path::new("none"),
358 Path::new("/"),
359 "tmpfs",
360 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
361 "size=20480",
362 )?;
363
364 let run_cras_path = Path::new("/run/cras");
365 jail.mount_bind(run_cras_path, run_cras_path, true)?;
366
367 add_current_user_to_jail(&mut jail)?;
368
369 Some(jail)
370 }
371 None => None,
372 };
373
374 Ok(VirtioDeviceStub {
375 dev: Box::new(dev),
376 jail,
377 })
378}
379
David Tolnay2b089fc2019-03-04 15:33:22 -0800380#[cfg(feature = "tpm")]
381fn create_tpm_device(cfg: &Config) -> DeviceResult {
382 use std::ffi::CString;
383 use std::fs;
384 use std::process;
David Tolnay2b089fc2019-03-04 15:33:22 -0800385
386 let tpm_storage: PathBuf;
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700387 let mut tpm_jail = simple_jail(cfg, "tpm_device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800388
389 match &mut tpm_jail {
390 Some(jail) => {
391 // Create a tmpfs in the device's root directory for tpm
392 // simulator storage. The size is 20*1024, or 20 KB.
393 jail.mount_with_data(
394 Path::new("none"),
395 Path::new("/"),
396 "tmpfs",
397 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
398 "size=20480",
399 )?;
400
Fergus Dall51200512021-08-19 12:54:26 +1000401 let crosvm_ids = add_current_user_to_jail(jail)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800402
403 let pid = process::id();
404 let tpm_pid_dir = format!("/run/vm/tpm.{}", pid);
405 tpm_storage = Path::new(&tpm_pid_dir).to_owned();
Daniel Verkamp6b298582021-08-16 15:37:11 -0700406 fs::create_dir_all(&tpm_storage).with_context(|| {
407 format!("failed to create tpm storage dir {}", tpm_storage.display())
408 })?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800409 let tpm_pid_dir_c = CString::new(tpm_pid_dir).expect("no nul bytes");
David Tolnayfd0971d2019-03-04 17:15:57 -0800410 chown(&tpm_pid_dir_c, crosvm_ids.uid, crosvm_ids.gid)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700411 .context("failed to chown tpm storage")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800412
413 jail.mount_bind(&tpm_storage, &tpm_storage, true)?;
414 }
415 None => {
416 // Path used inside cros_sdk which does not have /run/vm.
417 tpm_storage = Path::new("/tmp/tpm-simulator").to_owned();
418 }
419 }
420
421 let dev = virtio::Tpm::new(tpm_storage);
422
423 Ok(VirtioDeviceStub {
424 dev: Box::new(dev),
425 jail: tpm_jail,
426 })
427}
428
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700429fn create_single_touch_device(
430 cfg: &Config,
431 single_touch_spec: &TouchDeviceOption,
432 idx: u32,
433) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800434 let socket = single_touch_spec
435 .get_path()
436 .into_unix_stream()
437 .map_err(|e| {
438 error!("failed configuring virtio single touch: {:?}", e);
439 e
440 })?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800441
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800442 let (width, height) = single_touch_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700443 let dev = virtio::new_single_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700444 idx,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700445 socket,
446 width,
447 height,
448 virtio::base_features(cfg.protected_vm),
449 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700450 .context("failed to set up input device")?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800451 Ok(VirtioDeviceStub {
452 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700453 jail: simple_jail(cfg, "input_device")?,
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800454 })
455}
456
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700457fn create_multi_touch_device(
458 cfg: &Config,
459 multi_touch_spec: &TouchDeviceOption,
460 idx: u32,
461) -> DeviceResult {
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000462 let socket = multi_touch_spec
463 .get_path()
464 .into_unix_stream()
465 .map_err(|e| {
466 error!("failed configuring virtio multi touch: {:?}", e);
467 e
468 })?;
469
470 let (width, height) = multi_touch_spec.get_size();
471 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700472 idx,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000473 socket,
474 width,
475 height,
476 virtio::base_features(cfg.protected_vm),
477 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700478 .context("failed to set up input device")?;
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000479
480 Ok(VirtioDeviceStub {
481 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700482 jail: simple_jail(cfg, "input_device")?,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000483 })
484}
485
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700486fn create_trackpad_device(
487 cfg: &Config,
488 trackpad_spec: &TouchDeviceOption,
489 idx: u32,
490) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800491 let socket = trackpad_spec.get_path().into_unix_stream().map_err(|e| {
Maciek Swiechc3011222021-11-24 21:01:04 +0000492 error!("failed configuring virtio trackpad: {:#}", e);
David Tolnay2b089fc2019-03-04 15:33:22 -0800493 e
494 })?;
495
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800496 let (width, height) = trackpad_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700497 let dev = virtio::new_trackpad(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700498 idx,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700499 socket,
500 width,
501 height,
502 virtio::base_features(cfg.protected_vm),
503 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700504 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800505
506 Ok(VirtioDeviceStub {
507 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700508 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800509 })
510}
511
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700512fn create_mouse_device<T: IntoUnixStream>(cfg: &Config, mouse_socket: T, idx: u32) -> DeviceResult {
Zach Reizner65b98f12019-11-22 17:34:58 -0800513 let socket = mouse_socket.into_unix_stream().map_err(|e| {
Maciek Swiechc3011222021-11-24 21:01:04 +0000514 error!("failed configuring virtio mouse: {:#}", e);
David Tolnay2b089fc2019-03-04 15:33:22 -0800515 e
516 })?;
517
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700518 let dev = virtio::new_mouse(idx, socket, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700519 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800520
521 Ok(VirtioDeviceStub {
522 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700523 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800524 })
525}
526
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700527fn create_keyboard_device<T: IntoUnixStream>(
528 cfg: &Config,
529 keyboard_socket: T,
530 idx: u32,
531) -> DeviceResult {
Zach Reizner65b98f12019-11-22 17:34:58 -0800532 let socket = keyboard_socket.into_unix_stream().map_err(|e| {
Maciek Swiechc3011222021-11-24 21:01:04 +0000533 error!("failed configuring virtio keyboard: {:#}", e);
David Tolnay2b089fc2019-03-04 15:33:22 -0800534 e
535 })?;
536
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700537 let dev = virtio::new_keyboard(idx, socket, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700538 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800539
540 Ok(VirtioDeviceStub {
541 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700542 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800543 })
544}
545
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700546fn create_switches_device<T: IntoUnixStream>(
547 cfg: &Config,
548 switches_socket: T,
549 idx: u32,
550) -> DeviceResult {
Daniel Norman5e23df72021-03-11 10:11:02 -0800551 let socket = switches_socket.into_unix_stream().map_err(|e| {
Maciek Swiechc3011222021-11-24 21:01:04 +0000552 error!("failed configuring virtio switches: {:#}", e);
Daniel Norman5e23df72021-03-11 10:11:02 -0800553 e
554 })?;
555
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700556 let dev = virtio::new_switches(idx, socket, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700557 .context("failed to set up input device")?;
Daniel Norman5e23df72021-03-11 10:11:02 -0800558
559 Ok(VirtioDeviceStub {
560 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700561 jail: simple_jail(cfg, "input_device")?,
Daniel Norman5e23df72021-03-11 10:11:02 -0800562 })
563}
564
David Tolnay2b089fc2019-03-04 15:33:22 -0800565fn create_vinput_device(cfg: &Config, dev_path: &Path) -> DeviceResult {
566 let dev_file = OpenOptions::new()
567 .read(true)
568 .write(true)
569 .open(dev_path)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700570 .with_context(|| format!("failed to open vinput device {}", dev_path.display()))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800571
Noah Goldd4ca29b2020-10-27 12:21:52 -0700572 let dev = virtio::new_evdev(dev_file, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700573 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800574
575 Ok(VirtioDeviceStub {
576 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700577 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800578 })
579}
580
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800581fn create_balloon_device(cfg: &Config, tube: Tube) -> DeviceResult {
582 let dev = virtio::Balloon::new(virtio::base_features(cfg.protected_vm), tube)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700583 .context("failed to create balloon")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800584
585 Ok(VirtioDeviceStub {
586 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700587 jail: simple_jail(cfg, "balloon_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800588 })
589}
590
Michael Hoylea596a072020-11-10 19:32:45 -0800591fn create_tap_net_device(cfg: &Config, tap_fd: RawDescriptor) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800592 // Safe because we ensure that we get a unique handle to the fd.
593 let tap = unsafe {
Michael Hoylea596a072020-11-10 19:32:45 -0800594 Tap::from_raw_descriptor(
Daniel Verkamp6b298582021-08-16 15:37:11 -0700595 validate_raw_descriptor(tap_fd).context("failed to validate tap descriptor")?,
Michael Hoylea596a072020-11-10 19:32:45 -0800596 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700597 .context("failed to create tap device")?
David Tolnay2b089fc2019-03-04 15:33:22 -0800598 };
599
Xiong Zhang773c7072020-03-20 10:39:55 +0800600 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
601 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700602 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800603 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
604 vq_pairs = 1;
605 }
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100606 let features = virtio::base_features(cfg.protected_vm);
Daniel Verkamp6b298582021-08-16 15:37:11 -0700607 let dev =
Alexandre Courbot00065862021-12-08 13:23:54 +0900608 virtio::Net::from(features, tap, vq_pairs).context("failed to create tap net device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800609
610 Ok(VirtioDeviceStub {
611 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700612 jail: simple_jail(cfg, "net_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800613 })
614}
615
616fn create_net_device(
617 cfg: &Config,
618 host_ip: Ipv4Addr,
619 netmask: Ipv4Addr,
620 mac_address: MacAddress,
David Tolnay2b089fc2019-03-04 15:33:22 -0800621) -> DeviceResult {
Xiong Zhang773c7072020-03-20 10:39:55 +0800622 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
623 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700624 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800625 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
626 vq_pairs = 1;
627 }
628
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100629 let features = virtio::base_features(cfg.protected_vm);
David Tolnay2b089fc2019-03-04 15:33:22 -0800630 let dev = if cfg.vhost_net {
Will Deacon81d5adb2020-10-06 18:37:48 +0100631 let dev = virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(
Christian Blichmann2f5d4b62021-03-10 18:08:08 +0100632 &cfg.vhost_net_device_path,
Will Deacon81d5adb2020-10-06 18:37:48 +0100633 features,
634 host_ip,
635 netmask,
636 mac_address,
Will Deacon81d5adb2020-10-06 18:37:48 +0100637 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700638 .context("failed to set up vhost networking")?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800639 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800640 } else {
Will Deacon81d5adb2020-10-06 18:37:48 +0100641 let dev = virtio::Net::<Tap>::new(features, host_ip, netmask, mac_address, vq_pairs)
Alexandre Courbot00065862021-12-08 13:23:54 +0900642 .context("failed to create virtio network device")?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800643 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800644 };
645
646 let policy = if cfg.vhost_net {
Matt Delco45caf912019-11-13 08:11:09 -0800647 "vhost_net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800648 } else {
Matt Delco45caf912019-11-13 08:11:09 -0800649 "net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800650 };
651
652 Ok(VirtioDeviceStub {
653 dev,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700654 jail: simple_jail(cfg, policy)?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800655 })
656}
657
Keiichi Watanabe60686582021-03-12 04:53:51 +0900658fn create_vhost_user_net_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
659 let dev = VhostUserNet::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700660 .context("failed to set up vhost-user net device")?;
Keiichi Watanabe60686582021-03-12 04:53:51 +0900661
662 Ok(VirtioDeviceStub {
663 dev: Box::new(dev),
664 // no sandbox here because virtqueue handling is exported to a different process.
665 jail: None,
666 })
667}
668
Chirantan Ekbote84091e52021-09-10 18:43:17 +0900669fn create_vhost_user_vsock_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
670 let dev = VhostUserVsock::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700671 .context("failed to set up vhost-user vsock device")?;
Chirantan Ekbote84091e52021-09-10 18:43:17 +0900672
673 Ok(VirtioDeviceStub {
674 dev: Box::new(dev),
675 // no sandbox here because virtqueue handling is exported to a different process.
676 jail: None,
677 })
678}
679
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900680fn create_vhost_user_wl_device(cfg: &Config, opt: &VhostUserWlOption) -> DeviceResult {
681 // The crosvm wl device expects us to connect the tube before it will accept a vhost-user
682 // connection.
683 let dev = VhostUserWl::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700684 .context("failed to set up vhost-user wl device")?;
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900685
686 Ok(VirtioDeviceStub {
687 dev: Box::new(dev),
688 // no sandbox here because virtqueue handling is exported to a different process.
689 jail: None,
690 })
691}
692
David Tolnay2b089fc2019-03-04 15:33:22 -0800693#[cfg(feature = "gpu")]
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900694fn create_vhost_user_gpu_device(
695 cfg: &Config,
696 opt: &VhostUserOption,
697 host_tube: Tube,
698 device_tube: Tube,
699) -> DeviceResult {
700 // The crosvm gpu device expects us to connect the tube before it will accept a vhost-user
701 // connection.
702 let dev = VhostUserGpu::new(
703 virtio::base_features(cfg.protected_vm),
704 &opt.socket,
705 host_tube,
706 device_tube,
707 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700708 .context("failed to set up vhost-user gpu device")?;
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900709
710 Ok(VirtioDeviceStub {
711 dev: Box::new(dev),
712 // no sandbox here because virtqueue handling is exported to a different process.
713 jail: None,
714 })
715}
716
717#[cfg(feature = "gpu")]
David Tolnay2b089fc2019-03-04 15:33:22 -0800718fn create_gpu_device(
719 cfg: &Config,
Michael Hoyle685316f2020-09-16 15:29:20 -0700720 exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800721 gpu_device_tube: Tube,
722 resource_bridges: Vec<Tube>,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900723 wayland_socket_path: Option<&PathBuf>,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700724 x_display: Option<String>,
Zach Reizner65b98f12019-11-22 17:34:58 -0800725 event_devices: Vec<EventDevice>,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700726 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800727) -> DeviceResult {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700728 let mut display_backends = vec![
729 virtio::DisplayBackend::X(x_display),
Jason Macnak60eb1fb2020-01-09 14:36:29 -0800730 virtio::DisplayBackend::Stub,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700731 ];
732
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700733 let wayland_socket_dirs = cfg
734 .wayland_socket_paths
735 .iter()
736 .map(|(_name, path)| path.parent())
737 .collect::<Option<Vec<_>>>()
Daniel Verkamp6b298582021-08-16 15:37:11 -0700738 .ok_or_else(|| anyhow!("wayland socket path has no parent or file name"))?;
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700739
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900740 if let Some(socket_path) = wayland_socket_path {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700741 display_backends.insert(
742 0,
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700743 virtio::DisplayBackend::Wayland(Some(socket_path.to_owned())),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700744 );
745 }
746
David Tolnay2b089fc2019-03-04 15:33:22 -0800747 let dev = virtio::Gpu::new(
Daniel Verkamp6b298582021-08-16 15:37:11 -0700748 exit_evt.try_clone().context("failed to clone event")?,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800749 Some(gpu_device_tube),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800750 resource_bridges,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700751 display_backends,
Jason Macnakcc7070b2019-11-06 14:48:12 -0800752 cfg.gpu_parameters.as_ref().unwrap(),
Zach Reizner65b98f12019-11-22 17:34:58 -0800753 event_devices,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700754 map_request,
755 cfg.sandbox,
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100756 virtio::base_features(cfg.protected_vm),
Gurchetan Singh781d9752021-02-15 17:45:22 -0800757 cfg.wayland_socket_paths.clone(),
David Tolnay2b089fc2019-03-04 15:33:22 -0800758 );
759
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700760 let jail = match simple_jail(cfg, "gpu_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800761 Some(mut jail) => {
762 // Create a tmpfs in the device's root directory so that we can bind mount the
763 // dri directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
764 jail.mount_with_data(
765 Path::new("none"),
766 Path::new("/"),
767 "tmpfs",
768 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
769 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800770 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800771
772 // Device nodes required for DRM.
773 let sys_dev_char_path = Path::new("/sys/dev/char");
David Tolnayfd0971d2019-03-04 17:15:57 -0800774 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800775 let sys_devices_path = Path::new("/sys/devices");
David Tolnayfd0971d2019-03-04 17:15:57 -0800776 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
Jason Macnak23400522020-08-28 09:10:46 -0700777
David Tolnay2b089fc2019-03-04 15:33:22 -0800778 let drm_dri_path = Path::new("/dev/dri");
Jason Macnak23400522020-08-28 09:10:46 -0700779 if drm_dri_path.exists() {
780 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
781 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800782
John Batesb220eac2020-09-14 17:03:02 -0700783 // Prepare GPU shader disk cache directory.
784 if let Some(cache_dir) = cfg
785 .gpu_parameters
786 .as_ref()
787 .and_then(|params| params.cache_path.as_ref())
788 {
789 if cfg!(any(target_arch = "arm", target_arch = "aarch64")) && cfg.sandbox {
790 warn!("shader caching not yet supported on ARM with sandbox enabled");
791 env::set_var("MESA_GLSL_CACHE_DISABLE", "true");
792 } else {
John Bates04059732020-10-01 15:58:55 -0700793 env::set_var("MESA_GLSL_CACHE_DISABLE", "false");
John Batesb220eac2020-09-14 17:03:02 -0700794 env::set_var("MESA_GLSL_CACHE_DIR", cache_dir);
795 if let Some(cache_size) = cfg
796 .gpu_parameters
797 .as_ref()
798 .and_then(|params| params.cache_size.as_ref())
799 {
800 env::set_var("MESA_GLSL_CACHE_MAX_SIZE", cache_size);
801 }
802 let shadercache_path = Path::new(cache_dir);
803 jail.mount_bind(shadercache_path, shadercache_path, true)?;
804 }
805 }
806
David Riley06787c52019-07-24 12:09:07 -0700807 // If the ARM specific devices exist on the host, bind mount them in.
808 let mali0_path = Path::new("/dev/mali0");
809 if mali0_path.exists() {
810 jail.mount_bind(mali0_path, mali0_path, true)?;
811 }
812
813 let pvr_sync_path = Path::new("/dev/pvr_sync");
814 if pvr_sync_path.exists() {
815 jail.mount_bind(pvr_sync_path, pvr_sync_path, true)?;
816 }
817
Gurchetan Singhb66d6f62019-11-08 10:41:29 -0800818 // If the udmabuf driver exists on the host, bind mount it in.
819 let udmabuf_path = Path::new("/dev/udmabuf");
820 if udmabuf_path.exists() {
821 jail.mount_bind(udmabuf_path, udmabuf_path, true)?;
822 }
823
David Tolnay2b089fc2019-03-04 15:33:22 -0800824 // Libraries that are required when mesa drivers are dynamically loaded.
Chia-I Wud562b1a2020-12-27 21:08:27 -0800825 let lib_dirs = &[
826 "/usr/lib",
827 "/usr/lib64",
828 "/lib",
829 "/lib64",
John Batesef085de2021-03-15 08:55:54 -0700830 "/usr/share/glvnd",
Chia-I Wud562b1a2020-12-27 21:08:27 -0800831 "/usr/share/vulkan",
832 ];
David Riley06787c52019-07-24 12:09:07 -0700833 for dir in lib_dirs {
834 let dir_path = Path::new(dir);
835 if dir_path.exists() {
836 jail.mount_bind(dir_path, dir_path, false)?;
837 }
838 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800839
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700840 // Bind mount the wayland socket's directory into jail's root. This is necessary since
841 // each new wayland context must open() the socket. If the wayland socket is ever
842 // destroyed and remade in the same host directory, new connections will be possible
843 // without restarting the wayland device.
844 for dir in &wayland_socket_dirs {
845 jail.mount_bind(dir, dir, true)?;
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700846 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800847
Fergus Dall51200512021-08-19 12:54:26 +1000848 add_current_user_to_jail(&mut jail)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800849
David Riley54e660b2019-07-24 17:22:50 -0700850 // pvr driver requires read access to /proc/self/task/*/comm.
851 let proc_path = Path::new("/proc");
852 jail.mount(
853 proc_path,
854 proc_path,
855 "proc",
856 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY) as usize,
857 )?;
858
John Bates0d9d0e32020-12-03 11:37:33 -0800859 // To enable perfetto tracing, we need to give access to the perfetto service IPC
860 // endpoints.
861 let perfetto_path = Path::new("/run/perfetto");
862 if perfetto_path.exists() {
863 jail.mount_bind(perfetto_path, perfetto_path, true)?;
864 }
865
David Tolnay2b089fc2019-03-04 15:33:22 -0800866 Some(jail)
867 }
868 None => None,
869 };
870
871 Ok(VirtioDeviceStub {
872 dev: Box::new(dev),
873 jail,
874 })
875}
876
877fn create_wayland_device(
878 cfg: &Config,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800879 control_tube: Tube,
880 resource_bridge: Option<Tube>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800881) -> DeviceResult {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900882 let wayland_socket_dirs = cfg
883 .wayland_socket_paths
884 .iter()
885 .map(|(_name, path)| path.parent())
886 .collect::<Option<Vec<_>>>()
Daniel Verkamp6b298582021-08-16 15:37:11 -0700887 .ok_or_else(|| anyhow!("wayland socket path has no parent or file name"))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800888
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100889 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +0100890 let dev = virtio::Wl::new(
891 features,
892 cfg.wayland_socket_paths.clone(),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800893 control_tube,
Will Deacon81d5adb2020-10-06 18:37:48 +0100894 resource_bridge,
895 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700896 .context("failed to create wayland device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800897
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700898 let jail = match simple_jail(cfg, "wl_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800899 Some(mut jail) => {
900 // Create a tmpfs in the device's root directory so that we can bind mount the wayland
901 // socket directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
902 jail.mount_with_data(
903 Path::new("none"),
904 Path::new("/"),
905 "tmpfs",
906 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
907 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800908 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800909
910 // Bind mount the wayland socket's directory into jail's root. This is necessary since
911 // each new wayland context must open() the socket. If the wayland socket is ever
912 // destroyed and remade in the same host directory, new connections will be possible
913 // without restarting the wayland device.
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900914 for dir in &wayland_socket_dirs {
915 jail.mount_bind(dir, dir, true)?;
916 }
Fergus Dall51200512021-08-19 12:54:26 +1000917 add_current_user_to_jail(&mut jail)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800918
919 Some(jail)
920 }
921 None => None,
922 };
923
924 Ok(VirtioDeviceStub {
925 dev: Box::new(dev),
926 jail,
927 })
928}
929
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900930#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
931fn create_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900932 backend: VideoBackendType,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900933 cfg: &Config,
934 typ: devices::virtio::VideoDeviceType,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800935 resource_bridge: Tube,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900936) -> DeviceResult {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700937 let jail = match simple_jail(cfg, "video_device")? {
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900938 Some(mut jail) => {
939 match typ {
Alexandre Courbot8230abf2021-06-26 22:49:26 +0900940 #[cfg(feature = "video-decoder")]
Fergus Dall51200512021-08-19 12:54:26 +1000941 devices::virtio::VideoDeviceType::Decoder => add_current_user_to_jail(&mut jail)?,
Alexandre Courbot8230abf2021-06-26 22:49:26 +0900942 #[cfg(feature = "video-encoder")]
Fergus Dall51200512021-08-19 12:54:26 +1000943 devices::virtio::VideoDeviceType::Encoder => add_current_user_to_jail(&mut jail)?,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900944 };
945
946 // Create a tmpfs in the device's root directory so that we can bind mount files.
947 jail.mount_with_data(
948 Path::new("none"),
949 Path::new("/"),
950 "tmpfs",
951 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
952 "size=67108864",
953 )?;
954
Alexandre Courbotc02960d2021-07-11 23:06:30 +0900955 #[cfg(feature = "libvda")]
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900956 // Render node for libvda.
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900957 if backend == VideoBackendType::Libvda {
Chih-Yu Huangd2c2bd12021-12-06 14:09:59 +0900958 // follow the implementation at:
959 // https://source.corp.google.com/chromeos_public/src/platform/minigbm/cros_gralloc/cros_gralloc_driver.cc;l=90;bpv=0;cl=c06cc9cccb3cf3c7f9d2aec706c27c34cd6162a0
960 const DRM_NUM_NODES: u32 = 63;
961 const DRM_RENDER_NODE_START: u32 = 128;
962 for offset in 0..DRM_NUM_NODES {
963 let path_str = format!("/dev/dri/renderD{}", DRM_RENDER_NODE_START + offset);
964 let dev_dri_path = Path::new(&path_str);
965 if !dev_dri_path.exists() {
966 break;
967 }
968 jail.mount_bind(dev_dri_path, dev_dri_path, false)?;
969 }
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900970 }
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900971
David Stevense341d0a2020-10-08 18:02:32 +0900972 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
973 {
974 // Device nodes used by libdrm through minigbm in libvda on AMD devices.
975 let sys_dev_char_path = Path::new("/sys/dev/char");
976 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
977 let sys_devices_path = Path::new("/sys/devices");
978 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
979
980 // Required for loading dri libraries loaded by minigbm on AMD devices.
981 let lib_dir = Path::new("/usr/lib64");
982 jail.mount_bind(lib_dir, lib_dir, false)?;
983 }
984
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900985 // Device nodes required by libchrome which establishes Mojo connection in libvda.
986 let dev_urandom_path = Path::new("/dev/urandom");
987 jail.mount_bind(dev_urandom_path, dev_urandom_path, false)?;
988 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
989 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
990
991 Some(jail)
992 }
993 None => None,
994 };
995
996 Ok(VirtioDeviceStub {
997 dev: Box::new(devices::virtio::VideoDevice::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100998 virtio::base_features(cfg.protected_vm),
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900999 typ,
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001000 backend,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001001 Some(resource_bridge),
1002 )),
1003 jail,
1004 })
1005}
1006
1007#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
1008fn register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001009 backend: VideoBackendType,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001010 devs: &mut Vec<VirtioDeviceStub>,
Daniel Verkampffb59122021-03-18 14:06:15 -07001011 video_tube: Tube,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001012 cfg: &Config,
1013 typ: devices::virtio::VideoDeviceType,
Daniel Verkamp6b298582021-08-16 15:37:11 -07001014) -> Result<()> {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001015 devs.push(create_video_device(backend, cfg, typ, video_tube)?);
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001016 Ok(())
1017}
1018
Chirantan Ekbote3e8d52b2021-09-10 18:27:16 +09001019fn create_vhost_vsock_device(cfg: &Config, cid: u64) -> DeviceResult {
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001020 let features = virtio::base_features(cfg.protected_vm);
Christian Blichmann50f95912021-11-05 16:59:39 +01001021
1022 let device_file = match cfg
1023 .vhost_vsock_device
1024 .as_ref()
1025 .unwrap_or(&VhostVsockDeviceParameter::default())
1026 {
1027 VhostVsockDeviceParameter::Fd(fd) => {
1028 let fd = validate_raw_descriptor(*fd)
1029 .context("failed to validate fd for virtual socker device")?;
1030 // Safe because the `fd` is actually owned by this process and
1031 // we have a unique handle to it.
1032 unsafe { File::from_raw_fd(fd) }
1033 }
1034 VhostVsockDeviceParameter::Path(path) => OpenOptions::new()
1035 .read(true)
1036 .write(true)
1037 .custom_flags(libc::O_CLOEXEC | libc::O_NONBLOCK)
1038 .open(path)
1039 .context("failed to open virtual socket device")?,
1040 };
1041
1042 let dev = virtio::vhost::Vsock::new(device_file, features, cid)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001043 .context("failed to set up virtual socket device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001044
1045 Ok(VirtioDeviceStub {
1046 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001047 jail: simple_jail(cfg, "vhost_vsock_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -08001048 })
1049}
1050
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001051fn create_fs_device(
1052 cfg: &Config,
1053 uid_map: &str,
1054 gid_map: &str,
1055 src: &Path,
1056 tag: &str,
1057 fs_cfg: virtio::fs::passthrough::Config,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001058 device_tube: Tube,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001059) -> DeviceResult {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001060 let max_open_files =
1061 base::get_max_open_files().context("failed to get max number of open files")?;
Matt Delcoc24ad782020-02-14 13:24:36 -08001062 let j = if cfg.sandbox {
1063 let seccomp_policy = cfg.seccomp_policy_dir.join("fs_device");
1064 let config = SandboxConfig {
1065 limit_caps: false,
1066 uid_map: Some(uid_map),
1067 gid_map: Some(gid_map),
1068 log_failures: cfg.seccomp_log_failures,
1069 seccomp_policy: &seccomp_policy,
1070 };
Chirantan Ekbote34d45e52020-04-20 18:15:02 +09001071 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
1072 // We want bind mounts from the parent namespaces to propagate into the fs device's
1073 // namespace.
1074 jail.set_remount_mode(libc::MS_SLAVE);
1075
1076 jail
Matt Delcoc24ad782020-02-14 13:24:36 -08001077 } else {
1078 create_base_minijail(src, Some(max_open_files), None)?
1079 };
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001080
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001081 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001082 // TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic
1083 // when num_queues > 1.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001084 let dev = virtio::fs::Fs::new(features, tag, 1, fs_cfg, device_tube)
1085 .context("failed to create fs device")?;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001086
1087 Ok(VirtioDeviceStub {
1088 dev: Box::new(dev),
1089 jail: Some(j),
1090 })
1091}
1092
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001093fn create_9p_device(
1094 cfg: &Config,
1095 uid_map: &str,
1096 gid_map: &str,
1097 src: &Path,
1098 tag: &str,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001099 mut p9_cfg: p9::Config,
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001100) -> DeviceResult {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001101 let max_open_files =
1102 base::get_max_open_files().context("failed to get max number of open files")?;
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001103 let (jail, root) = if cfg.sandbox {
1104 let seccomp_policy = cfg.seccomp_policy_dir.join("9p_device");
1105 let config = SandboxConfig {
1106 limit_caps: false,
1107 uid_map: Some(uid_map),
1108 gid_map: Some(gid_map),
1109 log_failures: cfg.seccomp_log_failures,
1110 seccomp_policy: &seccomp_policy,
1111 };
David Tolnay2b089fc2019-03-04 15:33:22 -08001112
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001113 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
1114 // We want bind mounts from the parent namespaces to propagate into the 9p server's
1115 // namespace.
1116 jail.set_remount_mode(libc::MS_SLAVE);
Chirantan Ekbote055de382020-01-24 12:16:58 +09001117
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001118 // The shared directory becomes the root of the device's file system.
1119 let root = Path::new("/");
1120 (Some(jail), root)
1121 } else {
1122 // There's no mount namespace so we tell the server to treat the source directory as the
1123 // root.
1124 (None, src)
David Tolnay2b089fc2019-03-04 15:33:22 -08001125 };
1126
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001127 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001128 p9_cfg.root = root.into();
Daniel Verkamp6b298582021-08-16 15:37:11 -07001129 let dev = virtio::P9::new(features, tag, p9_cfg).context("failed to create 9p device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001130
1131 Ok(VirtioDeviceStub {
1132 dev: Box::new(dev),
1133 jail,
1134 })
1135}
1136
Jakub Starona3411ea2019-04-24 10:55:25 -07001137fn create_pmem_device(
1138 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001139 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001140 resources: &mut SystemAllocator,
1141 disk: &DiskOption,
1142 index: usize,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001143 pmem_device_tube: Tube,
Jakub Starona3411ea2019-04-24 10:55:25 -07001144) -> DeviceResult {
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09001145 let fd = open_file(&disk.path, disk.read_only, false /*O_DIRECT*/)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001146 .with_context(|| format!("failed to load disk image {}", disk.path.display()))?;
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001147
1148 let (disk_size, arena_size) = {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001149 let metadata = std::fs::metadata(&disk.path).with_context(|| {
1150 format!("failed to get disk image {} metadata", disk.path.display())
1151 })?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001152 let disk_len = metadata.len();
1153 // Linux requires pmem region sizes to be 2 MiB aligned. Linux will fill any partial page
1154 // at the end of an mmap'd file and won't write back beyond the actual file length, but if
1155 // we just align the size of the file to 2 MiB then access beyond the last page of the
1156 // mapped file will generate SIGBUS. So use a memory mapping arena that will provide
1157 // padding up to 2 MiB.
1158 let alignment = 2 * 1024 * 1024;
1159 let align_adjust = if disk_len % alignment != 0 {
1160 alignment - (disk_len % alignment)
1161 } else {
1162 0
1163 };
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001164 (
1165 disk_len,
1166 disk_len
1167 .checked_add(align_adjust)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001168 .ok_or_else(|| anyhow!("pmem device image too big"))?,
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001169 )
Jakub Starona3411ea2019-04-24 10:55:25 -07001170 };
1171
1172 let protection = {
1173 if disk.read_only {
1174 Protection::read()
1175 } else {
1176 Protection::read_write()
1177 }
1178 };
1179
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001180 let arena = {
Jakub Starona3411ea2019-04-24 10:55:25 -07001181 // Conversion from u64 to usize may fail on 32bit system.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001182 let arena_size = usize::try_from(arena_size).context("pmem device image too big")?;
1183 let disk_size = usize::try_from(disk_size).context("pmem device image too big")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001184
Daniel Verkamp6b298582021-08-16 15:37:11 -07001185 let mut arena =
1186 MemoryMappingArena::new(arena_size).context("failed to reserve pmem memory")?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001187 arena
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001188 .add_fd_offset_protection(0, disk_size, &fd, 0, protection)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001189 .context("failed to reserve pmem memory")?;
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001190
1191 // If the disk is not a multiple of the page size, the OS will fill the remaining part
1192 // of the page with zeroes. However, the anonymous mapping added below must start on a
1193 // page boundary, so round up the size before calculating the offset of the anon region.
1194 let disk_size = round_up_to_page_size(disk_size);
1195
1196 if arena_size > disk_size {
1197 // Add an anonymous region with the same protection as the disk mapping if the arena
1198 // size was aligned.
1199 arena
1200 .add_anon_protection(disk_size, arena_size - disk_size, protection)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001201 .context("failed to reserve pmem padding")?;
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001202 }
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001203 arena
Jakub Starona3411ea2019-04-24 10:55:25 -07001204 };
1205
1206 let mapping_address = resources
Xiong Zhang383b3b52019-10-30 14:59:26 +08001207 .mmio_allocator(MmioType::High)
Daniel Verkamp57e4f542021-10-28 09:56:40 -07001208 .reverse_allocate_with_align(
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001209 arena_size,
Jakub Starona3411ea2019-04-24 10:55:25 -07001210 Alloc::PmemDevice(index),
1211 format!("pmem_disk_image_{}", index),
1212 // Linux kernel requires pmem namespaces to be 128 MiB aligned.
1213 128 * 1024 * 1024, /* 128 MiB */
1214 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001215 .context("failed to allocate memory for pmem device")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001216
Daniel Verkampe1980a92020-02-07 11:00:55 -08001217 let slot = vm
Gurchetan Singh173fe622020-05-21 18:05:06 -07001218 .add_memory_region(
Daniel Verkampe1980a92020-02-07 11:00:55 -08001219 GuestAddress(mapping_address),
Gurchetan Singh173fe622020-05-21 18:05:06 -07001220 Box::new(arena),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001221 /* read_only = */ disk.read_only,
1222 /* log_dirty_pages = */ false,
1223 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001224 .context("failed to add pmem device memory")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001225
Daniel Verkampe1980a92020-02-07 11:00:55 -08001226 let dev = virtio::Pmem::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001227 virtio::base_features(cfg.protected_vm),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001228 fd,
1229 GuestAddress(mapping_address),
1230 slot,
1231 arena_size,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001232 Some(pmem_device_tube),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001233 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001234 .context("failed to create pmem device")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001235
1236 Ok(VirtioDeviceStub {
1237 dev: Box::new(dev) as Box<dyn VirtioDevice>,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001238 jail: simple_jail(cfg, "pmem_device")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07001239 })
1240}
1241
Zide Chendfc4b882021-03-10 16:35:37 -08001242fn create_iommu_device(
1243 cfg: &Config,
Zide Chen71435c12021-03-03 15:02:02 -08001244 phys_max_addr: u64,
Zide Chendfc4b882021-03-10 16:35:37 -08001245 endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
1246) -> DeviceResult {
Zide Chen71435c12021-03-03 15:02:02 -08001247 let dev = virtio::Iommu::new(
1248 virtio::base_features(cfg.protected_vm),
1249 endpoints,
1250 phys_max_addr,
1251 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001252 .context("failed to create IOMMU device")?;
Zide Chendfc4b882021-03-10 16:35:37 -08001253
1254 Ok(VirtioDeviceStub {
1255 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001256 jail: simple_jail(cfg, "iommu_device")?,
Zide Chendfc4b882021-03-10 16:35:37 -08001257 })
1258}
1259
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001260fn create_console_device(cfg: &Config, param: &SerialParameters) -> DeviceResult {
Michael Hoylecd23bc22020-10-20 22:12:20 -07001261 let mut keep_rds = Vec::new();
Daniel Verkamp6b298582021-08-16 15:37:11 -07001262 let evt = Event::new().context("failed to create event")?;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001263 let dev = param
Michael Hoylecd23bc22020-10-20 22:12:20 -07001264 .create_serial_device::<Console>(cfg.protected_vm, &evt, &mut keep_rds)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001265 .context("failed to create console device")?;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001266
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001267 let jail = match simple_jail(cfg, "serial")? {
Nicholas Verne71e73d82020-07-08 17:19:55 +10001268 Some(mut jail) => {
1269 // Create a tmpfs in the device's root directory so that we can bind mount the
1270 // log socket directory into it.
1271 // The size=67108864 is size=64*1024*1024 or size=64MB.
1272 jail.mount_with_data(
1273 Path::new("none"),
1274 Path::new("/"),
1275 "tmpfs",
1276 (libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_NOSUID) as usize,
1277 "size=67108864",
1278 )?;
Fergus Dall51200512021-08-19 12:54:26 +10001279 add_current_user_to_jail(&mut jail)?;
Nicholas Verne71e73d82020-07-08 17:19:55 +10001280 let res = param.add_bind_mounts(&mut jail);
1281 if res.is_err() {
1282 error!("failed to add bind mounts for console device");
1283 }
1284 Some(jail)
1285 }
1286 None => None,
1287 };
1288
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001289 Ok(VirtioDeviceStub {
1290 dev: Box::new(dev),
Nicholas Verne71e73d82020-07-08 17:19:55 +10001291 jail, // TODO(dverkamp): use a separate policy for console?
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001292 })
1293}
1294
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001295#[cfg(feature = "audio")]
1296fn create_sound_device(path: &Path, cfg: &Config) -> DeviceResult {
1297 let dev = virtio::new_sound(path, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001298 .context("failed to create sound device")?;
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001299
1300 Ok(VirtioDeviceStub {
1301 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001302 jail: simple_jail(cfg, "vios_audio_device")?,
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001303 })
1304}
1305
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001306// gpu_device_tube is not used when GPU support is disabled.
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -07001307#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001308fn create_virtio_devices(
1309 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001310 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001311 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001312 _exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001313 wayland_device_tube: Tube,
1314 gpu_device_tube: Tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001315 vhost_user_gpu_tubes: Vec<(Tube, Tube)>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001316 balloon_device_tube: Tube,
1317 disk_device_tubes: &mut Vec<Tube>,
1318 pmem_device_tubes: &mut Vec<Tube>,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001319 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001320 fs_device_tubes: &mut Vec<Tube>,
David Tolnay2b089fc2019-03-04 15:33:22 -08001321) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -07001322 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -08001323
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001324 for (_, param) in cfg
1325 .serial_parameters
1326 .iter()
1327 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
1328 {
1329 let dev = create_console_device(cfg, param)?;
1330 devs.push(dev);
1331 }
1332
Zach Reizner8fb52112017-12-13 16:04:39 -08001333 for disk in &cfg.disks {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001334 let disk_device_tube = disk_device_tubes.remove(0);
1335 devs.push(create_block_device(cfg, disk, disk_device_tube)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001336 }
1337
Keiichi Watanabef3a37f42021-01-21 15:41:11 +09001338 for blk in &cfg.vhost_user_blk {
1339 devs.push(create_vhost_user_block_device(cfg, blk)?);
1340 }
1341
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +09001342 for console in &cfg.vhost_user_console {
1343 devs.push(create_vhost_user_console_device(cfg, console)?);
1344 }
1345
Jakub Starona3411ea2019-04-24 10:55:25 -07001346 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001347 let pmem_device_tube = pmem_device_tubes.remove(0);
Daniel Verkampe1980a92020-02-07 11:00:55 -08001348 devs.push(create_pmem_device(
1349 cfg,
1350 vm,
1351 resources,
1352 pmem_disk,
1353 index,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001354 pmem_device_tube,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001355 )?);
Jakub Starona3411ea2019-04-24 10:55:25 -07001356 }
1357
David Tolnay2b089fc2019-03-04 15:33:22 -08001358 devs.push(create_rng_device(cfg)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001359
Woody Chow737ff122021-03-22 17:49:57 +09001360 #[cfg(feature = "audio_cras")]
1361 {
Woody Chow0b2b6062021-09-03 15:40:02 +09001362 if let Some(cras_snd) = &cfg.cras_snd {
1363 devs.push(create_cras_snd_device(cfg, cras_snd.clone())?);
Woody Chow737ff122021-03-22 17:49:57 +09001364 }
1365 }
1366
David Tolnayde6b29a2018-12-20 11:49:46 -08001367 #[cfg(feature = "tpm")]
1368 {
David Tolnay43f8e212019-02-13 17:28:16 -08001369 if cfg.software_tpm {
David Tolnay2b089fc2019-03-04 15:33:22 -08001370 devs.push(create_tpm_device(cfg)?);
David Tolnay43f8e212019-02-13 17:28:16 -08001371 }
David Tolnayde6b29a2018-12-20 11:49:46 -08001372 }
1373
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001374 for (idx, single_touch_spec) in cfg.virtio_single_touch.iter().enumerate() {
1375 devs.push(create_single_touch_device(
1376 cfg,
1377 single_touch_spec,
1378 idx as u32,
1379 )?);
Jorge E. Moreira99d3f082019-03-07 10:59:54 -08001380 }
1381
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001382 for (idx, multi_touch_spec) in cfg.virtio_multi_touch.iter().enumerate() {
1383 devs.push(create_multi_touch_device(
1384 cfg,
1385 multi_touch_spec,
1386 idx as u32,
1387 )?);
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001388 }
1389
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001390 for (idx, trackpad_spec) in cfg.virtio_trackpad.iter().enumerate() {
1391 devs.push(create_trackpad_device(cfg, trackpad_spec, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001392 }
1393
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001394 for (idx, mouse_socket) in cfg.virtio_mice.iter().enumerate() {
1395 devs.push(create_mouse_device(cfg, mouse_socket, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001396 }
1397
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001398 for (idx, keyboard_socket) in cfg.virtio_keyboard.iter().enumerate() {
1399 devs.push(create_keyboard_device(cfg, keyboard_socket, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001400 }
1401
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001402 for (idx, switches_socket) in cfg.virtio_switches.iter().enumerate() {
1403 devs.push(create_switches_device(cfg, switches_socket, idx as u32)?);
Daniel Norman5e23df72021-03-11 10:11:02 -08001404 }
1405
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001406 for dev_path in &cfg.virtio_input_evdevs {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001407 devs.push(create_vinput_device(cfg, dev_path)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001408 }
1409
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001410 devs.push(create_balloon_device(cfg, balloon_device_tube)?);
Dylan Reid295ccac2017-11-06 14:06:24 -08001411
Zach Reizner39aa26b2017-12-12 18:03:23 -08001412 // We checked above that if the IP is defined, then the netmask is, too.
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001413 for tap_fd in &cfg.tap_fd {
David Tolnay2b089fc2019-03-04 15:33:22 -08001414 devs.push(create_tap_net_device(cfg, *tap_fd)?);
Jorge E. Moreirab7952802019-02-12 16:43:05 -08001415 }
1416
David Tolnay2b089fc2019-03-04 15:33:22 -08001417 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
1418 (cfg.host_ip, cfg.netmask, cfg.mac_address)
1419 {
Keiichi Watanabe60686582021-03-12 04:53:51 +09001420 if !cfg.vhost_user_net.is_empty() {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001421 bail!("vhost-user-net cannot be used with any of --host_ip, --netmask or --mac");
Keiichi Watanabe60686582021-03-12 04:53:51 +09001422 }
Chirantan Ekbote3e8d52b2021-09-10 18:27:16 +09001423 devs.push(create_net_device(cfg, host_ip, netmask, mac_address)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001424 }
1425
Keiichi Watanabe60686582021-03-12 04:53:51 +09001426 for net in &cfg.vhost_user_net {
1427 devs.push(create_vhost_user_net_device(cfg, net)?);
1428 }
1429
Chirantan Ekbote84091e52021-09-10 18:43:17 +09001430 for vsock in &cfg.vhost_user_vsock {
1431 devs.push(create_vhost_user_vsock_device(cfg, vsock)?);
1432 }
1433
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09001434 for opt in &cfg.vhost_user_wl {
1435 devs.push(create_vhost_user_wl_device(cfg, opt)?);
1436 }
1437
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001438 #[cfg(feature = "gpu")]
1439 for (opt, (host_tube, device_tube)) in cfg.vhost_user_gpu.iter().zip(vhost_user_gpu_tubes) {
1440 devs.push(create_vhost_user_gpu_device(
1441 cfg,
1442 opt,
1443 host_tube,
1444 device_tube,
1445 )?);
1446 }
1447
David Tolnayfa701712019-02-13 16:42:54 -08001448 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001449 let mut resource_bridges = Vec::<Tube>::new();
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001450
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001451 if !cfg.wayland_socket_paths.is_empty() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001452 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001453 let mut wl_resource_bridge = None::<Tube>;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001454
1455 #[cfg(feature = "gpu")]
1456 {
Jason Macnakcc7070b2019-11-06 14:48:12 -08001457 if cfg.gpu_parameters.is_some() {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001458 let (wl_socket, gpu_socket) = Tube::pair().context("failed to create tube")?;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001459 resource_bridges.push(gpu_socket);
1460 wl_resource_bridge = Some(wl_socket);
1461 }
1462 }
1463
1464 devs.push(create_wayland_device(
1465 cfg,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001466 wayland_device_tube,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001467 wl_resource_bridge,
1468 )?);
1469 }
David Tolnayfa701712019-02-13 16:42:54 -08001470
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001471 #[cfg(feature = "video-decoder")]
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001472 let video_dec_cfg = if let Some(backend) = cfg.video_dec {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001473 let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
Daniel Verkampffb59122021-03-18 14:06:15 -07001474 resource_bridges.push(gpu_tube);
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001475 Some((video_tube, backend))
Daniel Verkampffb59122021-03-18 14:06:15 -07001476 } else {
1477 None
1478 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001479
1480 #[cfg(feature = "video-encoder")]
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001481 let video_enc_cfg = if let Some(backend) = cfg.video_enc {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001482 let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
Daniel Verkampffb59122021-03-18 14:06:15 -07001483 resource_bridges.push(gpu_tube);
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001484 Some((video_tube, backend))
Daniel Verkampffb59122021-03-18 14:06:15 -07001485 } else {
1486 None
1487 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001488
Zach Reizner3a8100a2017-09-13 19:15:43 -07001489 #[cfg(feature = "gpu")]
1490 {
Noah Golddc7f52b2020-02-01 13:01:58 -08001491 if let Some(gpu_parameters) = &cfg.gpu_parameters {
Jason Macnakd659a0d2021-03-15 15:33:01 -07001492 let mut gpu_display_w = DEFAULT_DISPLAY_WIDTH;
1493 let mut gpu_display_h = DEFAULT_DISPLAY_HEIGHT;
1494 if !gpu_parameters.displays.is_empty() {
1495 gpu_display_w = gpu_parameters.displays[0].width;
1496 gpu_display_h = gpu_parameters.displays[0].height;
1497 }
1498
Zach Reizner65b98f12019-11-22 17:34:58 -08001499 let mut event_devices = Vec::new();
1500 if cfg.display_window_mouse {
1501 let (event_device_socket, virtio_dev_socket) =
Daniel Verkamp6b298582021-08-16 15:37:11 -07001502 UnixStream::pair().context("failed to create socket")?;
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001503 let (multi_touch_width, multi_touch_height) = cfg
1504 .virtio_multi_touch
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001505 .first()
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001506 .as_ref()
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001507 .map(|multi_touch_spec| multi_touch_spec.get_size())
Jason Macnakd659a0d2021-03-15 15:33:01 -07001508 .unwrap_or((gpu_display_w, gpu_display_h));
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001509 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001510 // u32::MAX is the least likely to collide with the indices generated above for
1511 // the multi_touch options, which begin at 0.
1512 u32::MAX,
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001513 virtio_dev_socket,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001514 multi_touch_width,
1515 multi_touch_height,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001516 virtio::base_features(cfg.protected_vm),
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001517 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001518 .context("failed to set up mouse device")?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001519 devs.push(VirtioDeviceStub {
1520 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001521 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001522 });
1523 event_devices.push(EventDevice::touchscreen(event_device_socket));
1524 }
1525 if cfg.display_window_keyboard {
1526 let (event_device_socket, virtio_dev_socket) =
Daniel Verkamp6b298582021-08-16 15:37:11 -07001527 UnixStream::pair().context("failed to create socket")?;
Noah Goldd4ca29b2020-10-27 12:21:52 -07001528 let dev = virtio::new_keyboard(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001529 // u32::MAX is the least likely to collide with the indices generated above for
1530 // the multi_touch options, which begin at 0.
1531 u32::MAX,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001532 virtio_dev_socket,
1533 virtio::base_features(cfg.protected_vm),
1534 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001535 .context("failed to set up keyboard device")?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001536 devs.push(VirtioDeviceStub {
1537 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001538 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001539 });
1540 event_devices.push(EventDevice::keyboard(event_device_socket));
1541 }
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001542 devs.push(create_gpu_device(
1543 cfg,
1544 _exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001545 gpu_device_tube,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001546 resource_bridges,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001547 // Use the unnamed socket for GPU display screens.
1548 cfg.wayland_socket_paths.get(""),
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001549 cfg.x_display.clone(),
Zach Reizner65b98f12019-11-22 17:34:58 -08001550 event_devices,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001551 map_request,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001552 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -07001553 }
1554 }
1555
Daniel Verkampffb59122021-03-18 14:06:15 -07001556 #[cfg(feature = "video-decoder")]
1557 {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001558 if let Some((video_dec_tube, video_dec_backend)) = video_dec_cfg {
Daniel Verkampffb59122021-03-18 14:06:15 -07001559 register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001560 video_dec_backend,
Daniel Verkampffb59122021-03-18 14:06:15 -07001561 &mut devs,
1562 video_dec_tube,
1563 cfg,
1564 devices::virtio::VideoDeviceType::Decoder,
1565 )?;
1566 }
1567 }
1568
1569 #[cfg(feature = "video-encoder")]
1570 {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001571 if let Some((video_enc_tube, video_enc_backend)) = video_enc_cfg {
Daniel Verkampffb59122021-03-18 14:06:15 -07001572 register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001573 video_enc_backend,
Daniel Verkampffb59122021-03-18 14:06:15 -07001574 &mut devs,
1575 video_enc_tube,
1576 cfg,
1577 devices::virtio::VideoDeviceType::Encoder,
1578 )?;
1579 }
1580 }
1581
Zach Reizneraa575662018-08-15 10:46:32 -07001582 if let Some(cid) = cfg.cid {
Chirantan Ekbote3e8d52b2021-09-10 18:27:16 +09001583 devs.push(create_vhost_vsock_device(cfg, cid)?);
Zach Reizneraa575662018-08-15 10:46:32 -07001584 }
1585
Woody Chow5890b702021-02-12 14:57:02 +09001586 for vhost_user_fs in &cfg.vhost_user_fs {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001587 devs.push(create_vhost_user_fs_device(cfg, vhost_user_fs)?);
Woody Chow5890b702021-02-12 14:57:02 +09001588 }
1589
Woody Chow1b16db12021-04-02 16:59:59 +09001590 #[cfg(feature = "audio")]
1591 for vhost_user_snd in &cfg.vhost_user_snd {
1592 devs.push(create_vhost_user_snd_device(cfg, vhost_user_snd)?);
1593 }
1594
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001595 for shared_dir in &cfg.shared_dirs {
1596 let SharedDir {
1597 src,
1598 tag,
1599 kind,
1600 uid_map,
1601 gid_map,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001602 fs_cfg,
1603 p9_cfg,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001604 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -08001605
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001606 let dev = match kind {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001607 SharedDirKind::FS => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001608 let device_tube = fs_device_tubes.remove(0);
1609 create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone(), device_tube)?
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001610 }
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001611 SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag, p9_cfg.clone())?,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001612 };
1613 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -08001614 }
1615
JaeMan Parkeb9cc532021-07-02 15:02:59 +09001616 if let Some(vhost_user_mac80211_hwsim) = &cfg.vhost_user_mac80211_hwsim {
1617 devs.push(create_vhost_user_mac80211_hwsim_device(
1618 cfg,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001619 vhost_user_mac80211_hwsim,
JaeMan Parkeb9cc532021-07-02 15:02:59 +09001620 )?);
1621 }
1622
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001623 #[cfg(feature = "audio")]
1624 if let Some(path) = &cfg.sound {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001625 devs.push(create_sound_device(path, cfg)?);
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001626 }
1627
David Tolnay2b089fc2019-03-04 15:33:22 -08001628 Ok(devs)
1629}
1630
Xiong Zhang10f15052021-04-08 17:23:33 +08001631fn create_vfio_device(
1632 cfg: &Config,
1633 vm: &impl Vm,
1634 resources: &mut SystemAllocator,
1635 control_tubes: &mut Vec<TaggedControlTube>,
1636 vfio_path: &Path,
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001637 bus_num: Option<u8>,
Zide Chendfc4b882021-03-10 16:35:37 -08001638 endpoints: &mut BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
1639 iommu_enabled: bool,
Xiong Zhang10f15052021-04-08 17:23:33 +08001640) -> DeviceResult<(Box<VfioPciDevice>, Option<Minijail>)> {
Zide Chendfc4b882021-03-10 16:35:37 -08001641 let vfio_container = VfioCommonSetup::vfio_get_container(vfio_path, iommu_enabled)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001642 .context("failed to get vfio container")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001643
1644 // create MSI, MSI-X, and Mem request sockets for each vfio device
Daniel Verkamp6b298582021-08-16 15:37:11 -07001645 let (vfio_host_tube_msi, vfio_device_tube_msi) =
1646 Tube::pair().context("failed to create tube")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001647 control_tubes.push(TaggedControlTube::VmIrq(vfio_host_tube_msi));
1648
Daniel Verkamp6b298582021-08-16 15:37:11 -07001649 let (vfio_host_tube_msix, vfio_device_tube_msix) =
1650 Tube::pair().context("failed to create tube")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001651 control_tubes.push(TaggedControlTube::VmIrq(vfio_host_tube_msix));
1652
Daniel Verkamp6b298582021-08-16 15:37:11 -07001653 let (vfio_host_tube_mem, vfio_device_tube_mem) =
1654 Tube::pair().context("failed to create tube")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001655 control_tubes.push(TaggedControlTube::VmMemory(vfio_host_tube_mem));
1656
Keiichi Watanabe7b805542021-09-03 02:13:51 +09001657 let vfio_device =
1658 VfioDevice::new_passthrough(&vfio_path, vm, vfio_container.clone(), iommu_enabled)
1659 .context("failed to create vfio device")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001660 let mut vfio_pci_device = Box::new(VfioPciDevice::new(
1661 vfio_device,
Xiong Zhange19ab752021-05-20 18:18:46 +08001662 bus_num,
Xiong Zhang10f15052021-04-08 17:23:33 +08001663 vfio_device_tube_msi,
1664 vfio_device_tube_msix,
1665 vfio_device_tube_mem,
1666 ));
1667 // early reservation for pass-through PCI devices.
Zide Chendfc4b882021-03-10 16:35:37 -08001668 let endpoint_addr = vfio_pci_device.allocate_address(resources);
1669 if endpoint_addr.is_err() {
Xiong Zhang10f15052021-04-08 17:23:33 +08001670 warn!(
1671 "address reservation failed for vfio {}",
1672 vfio_pci_device.debug_label()
1673 );
1674 }
1675
Zide Chendfc4b882021-03-10 16:35:37 -08001676 if iommu_enabled {
1677 endpoints.insert(endpoint_addr.unwrap().to_u32(), vfio_container);
1678 }
1679
Xiong Zhang10f15052021-04-08 17:23:33 +08001680 Ok((vfio_pci_device, simple_jail(cfg, "vfio_device")?))
1681}
1682
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001683fn create_vfio_platform_device(
1684 cfg: &Config,
1685 vm: &impl Vm,
1686 _resources: &mut SystemAllocator,
1687 control_tubes: &mut Vec<TaggedControlTube>,
1688 vfio_path: &Path,
1689 _endpoints: &mut BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
1690 iommu_enabled: bool,
1691) -> DeviceResult<(VfioPlatformDevice, Option<Minijail>)> {
1692 let vfio_container = VfioCommonSetup::vfio_get_container(vfio_path, iommu_enabled)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001693 .context("Failed to create vfio device")?;
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001694
Daniel Verkamp6b298582021-08-16 15:37:11 -07001695 let (vfio_host_tube_mem, vfio_device_tube_mem) =
1696 Tube::pair().context("failed to create tube")?;
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001697 control_tubes.push(TaggedControlTube::VmMemory(vfio_host_tube_mem));
1698
Keiichi Watanabe7b805542021-09-03 02:13:51 +09001699 let vfio_device = VfioDevice::new_passthrough(&vfio_path, vm, vfio_container, iommu_enabled)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001700 .context("Failed to create vfio device")?;
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001701 let vfio_plat_dev = VfioPlatformDevice::new(vfio_device, vfio_device_tube_mem);
1702
1703 Ok((vfio_plat_dev, simple_jail(cfg, "vfio_platform_device")?))
1704}
1705
David Tolnay2b089fc2019-03-04 15:33:22 -08001706fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -06001707 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001708 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001709 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001710 exit_evt: &Event,
Zide Chen71435c12021-03-03 15:02:02 -08001711 phys_max_addr: u64,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001712 control_tubes: &mut Vec<TaggedControlTube>,
1713 wayland_device_tube: Tube,
1714 gpu_device_tube: Tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001715 vhost_user_gpu_tubes: Vec<(Tube, Tube)>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001716 balloon_device_tube: Tube,
1717 disk_device_tubes: &mut Vec<Tube>,
1718 pmem_device_tubes: &mut Vec<Tube>,
1719 fs_device_tubes: &mut Vec<Tube>,
Daniel Verkampf1439d42021-05-21 13:55:10 -07001720 #[cfg(feature = "usb")] usb_provider: HostBackendDeviceProvider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001721 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001722) -> DeviceResult<Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>> {
David Tolnay2b089fc2019-03-04 15:33:22 -08001723 let stubs = create_virtio_devices(
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001724 cfg,
Jakub Starona3411ea2019-04-24 10:55:25 -07001725 vm,
1726 resources,
David Tolnay2b089fc2019-03-04 15:33:22 -08001727 exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001728 wayland_device_tube,
1729 gpu_device_tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001730 vhost_user_gpu_tubes,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001731 balloon_device_tube,
1732 disk_device_tubes,
1733 pmem_device_tubes,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001734 map_request,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001735 fs_device_tubes,
David Tolnay2b089fc2019-03-04 15:33:22 -08001736 )?;
1737
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001738 let mut devices = Vec::new();
David Tolnay2b089fc2019-03-04 15:33:22 -08001739
1740 for stub in stubs {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001741 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001742 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
Zach Reiznerdc748482021-04-14 13:59:30 -07001743 let dev = VirtioPciDevice::new(vm.get_memory().clone(), stub.dev, msi_device_tube)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001744 .context("failed to create virtio pci dev")?;
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001745 let dev = Box::new(dev) as Box<dyn BusDeviceObj>;
1746 devices.push((dev, stub.jail));
David Tolnay2b089fc2019-03-04 15:33:22 -08001747 }
1748
Andrew Scull1590e6f2020-03-18 18:00:47 +00001749 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +08001750 for ac97_param in &cfg.ac97_parameters {
Zach Reiznerdc748482021-04-14 13:59:30 -07001751 let dev = Ac97Dev::try_new(vm.get_memory().clone(), ac97_param.clone())
Daniel Verkamp6b298582021-08-16 15:37:11 -07001752 .context("failed to create ac97 device")?;
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001753 let jail = simple_jail(cfg, dev.minijail_policy())?;
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001754 devices.push((Box::new(dev), jail));
David Tolnay2b089fc2019-03-04 15:33:22 -08001755 }
Andrew Scull1590e6f2020-03-18 18:00:47 +00001756
Daniel Verkampf1439d42021-05-21 13:55:10 -07001757 #[cfg(feature = "usb")]
1758 {
1759 // Create xhci controller.
1760 let usb_controller = Box::new(XhciController::new(vm.get_memory().clone(), usb_provider));
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001761 devices.push((usb_controller, simple_jail(cfg, "xhci")?));
Daniel Verkampf1439d42021-05-21 13:55:10 -07001762 }
David Tolnay2b089fc2019-03-04 15:33:22 -08001763
Zide Chen5deee482021-04-19 11:06:01 -07001764 if !cfg.vfio.is_empty() {
Zide Chendfc4b882021-03-10 16:35:37 -08001765 let mut iommu_attached_endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>> =
1766 BTreeMap::new();
1767
Tomasz Nowicki71aca792021-06-09 18:53:49 +00001768 for vfio_dev in cfg
1769 .vfio
1770 .iter()
1771 .filter(|dev| dev.get_type() == VfioType::Pci)
1772 {
1773 let vfio_path = &vfio_dev.vfio_path;
Zide Chen5deee482021-04-19 11:06:01 -07001774 let (vfio_pci_device, jail) = create_vfio_device(
1775 cfg,
1776 vm,
1777 resources,
1778 control_tubes,
1779 vfio_path.as_path(),
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001780 None,
Zide Chendfc4b882021-03-10 16:35:37 -08001781 &mut iommu_attached_endpoints,
Tomasz Nowicki71aca792021-06-09 18:53:49 +00001782 vfio_dev.iommu_enabled(),
Zide Chen5deee482021-04-19 11:06:01 -07001783 )?;
Zide Chendfc4b882021-03-10 16:35:37 -08001784
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001785 devices.push((vfio_pci_device, jail));
Zide Chen5deee482021-04-19 11:06:01 -07001786 }
Zide Chendfc4b882021-03-10 16:35:37 -08001787
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001788 for vfio_dev in cfg
1789 .vfio
1790 .iter()
1791 .filter(|dev| dev.get_type() == VfioType::Platform)
1792 {
1793 let vfio_path = &vfio_dev.vfio_path;
1794 let (vfio_plat_dev, jail) = create_vfio_platform_device(
1795 cfg,
1796 vm,
1797 resources,
1798 control_tubes,
1799 vfio_path.as_path(),
1800 &mut iommu_attached_endpoints,
1801 false, // Virtio IOMMU is not supported yet
1802 )?;
1803
1804 devices.push((Box::new(vfio_plat_dev), jail));
1805 }
1806
Zide Chendfc4b882021-03-10 16:35:37 -08001807 if !iommu_attached_endpoints.is_empty() {
Zide Chen71435c12021-03-03 15:02:02 -08001808 let iommu_dev = create_iommu_device(cfg, phys_max_addr, iommu_attached_endpoints)?;
Zide Chendfc4b882021-03-10 16:35:37 -08001809
Daniel Verkamp6b298582021-08-16 15:37:11 -07001810 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
Zide Chendfc4b882021-03-10 16:35:37 -08001811 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
Peter Fangad3b24e2021-06-21 00:43:29 -07001812 let mut dev =
1813 VirtioPciDevice::new(vm.get_memory().clone(), iommu_dev.dev, msi_device_tube)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001814 .context("failed to create virtio pci dev")?;
Peter Fangad3b24e2021-06-21 00:43:29 -07001815 // early reservation for viommu.
1816 dev.allocate_address(resources)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001817 .context("failed to allocate resources early for virtio pci dev")?;
Peter Fangad3b24e2021-06-21 00:43:29 -07001818 let dev = Box::new(dev);
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001819 devices.push((dev, iommu_dev.jail));
Zide Chendfc4b882021-03-10 16:35:37 -08001820 }
Xiong Zhang17b0daf2019-04-23 17:14:50 +08001821 }
1822
Mattias Nisslerde2c6402021-10-21 12:05:29 +00001823 for params in &cfg.stub_pci_devices {
1824 // Stub devices don't need jailing since they don't do anything.
1825 devices.push((Box::new(StubPciDevice::new(params)), None));
1826 }
1827
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001828 Ok(devices)
David Tolnay2b089fc2019-03-04 15:33:22 -08001829}
1830
1831#[derive(Copy, Clone)]
Chirantan Ekbote1a2683b2019-11-26 16:28:23 +09001832#[cfg_attr(not(feature = "tpm"), allow(dead_code))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001833struct Ids {
1834 uid: uid_t,
1835 gid: gid_t,
1836}
1837
David Tolnay48c48292019-03-01 16:54:25 -08001838// Set the uid/gid for the jailed process and give a basic id map. This is
1839// required for bind mounts to work.
Fergus Dall51200512021-08-19 12:54:26 +10001840fn add_current_user_to_jail(jail: &mut Minijail) -> Result<Ids> {
1841 let crosvm_uid = geteuid();
1842 let crosvm_gid = getegid();
David Tolnay48c48292019-03-01 16:54:25 -08001843
David Tolnay48c48292019-03-01 16:54:25 -08001844 jail.uidmap(&format!("{0} {0} 1", crosvm_uid))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001845 .context("error setting UID map")?;
David Tolnay48c48292019-03-01 16:54:25 -08001846 jail.gidmap(&format!("{0} {0} 1", crosvm_gid))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001847 .context("error setting GID map")?;
David Tolnay48c48292019-03-01 16:54:25 -08001848
Chirantan Ekbotee1663ee2021-09-03 18:31:25 +09001849 if crosvm_uid != 0 {
1850 jail.change_uid(crosvm_uid);
1851 }
1852 if crosvm_gid != 0 {
1853 jail.change_gid(crosvm_gid);
1854 }
Fergus Dall51200512021-08-19 12:54:26 +10001855
David Tolnay41a6f842019-03-01 16:18:44 -08001856 Ok(Ids {
1857 uid: crosvm_uid,
1858 gid: crosvm_gid,
1859 })
David Tolnay48c48292019-03-01 16:54:25 -08001860}
1861
Zach Reizner65b98f12019-11-22 17:34:58 -08001862trait IntoUnixStream {
1863 fn into_unix_stream(self) -> Result<UnixStream>;
1864}
1865
1866impl<'a> IntoUnixStream for &'a Path {
1867 fn into_unix_stream(self) -> Result<UnixStream> {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001868 if let Some(fd) = safe_descriptor_from_path(self).context("failed to open event device")? {
Andrew Walbranbc55e302021-07-13 17:35:10 +01001869 Ok(fd.into())
Zach Reizner65b98f12019-11-22 17:34:58 -08001870 } else {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001871 UnixStream::connect(self).context("failed to open event device")
Zach Reizner65b98f12019-11-22 17:34:58 -08001872 }
1873 }
1874}
1875impl<'a> IntoUnixStream for &'a PathBuf {
1876 fn into_unix_stream(self) -> Result<UnixStream> {
1877 self.as_path().into_unix_stream()
1878 }
1879}
1880
1881impl IntoUnixStream for UnixStream {
1882 fn into_unix_stream(self) -> Result<UnixStream> {
1883 Ok(self)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001884 }
1885}
1886
Steven Richmanf32d0b42020-06-20 21:45:32 -07001887fn setup_vcpu_signal_handler<T: Vcpu>(use_hypervisor_signals: bool) -> Result<()> {
1888 if use_hypervisor_signals {
Matt Delco84cf9c02019-10-07 22:38:13 -07001889 unsafe {
Allen Webb44c728c2021-03-23 15:22:41 -05001890 extern "C" fn handle_signal(_: c_int) {}
Matt Delco84cf9c02019-10-07 22:38:13 -07001891 // Our signal handler does nothing and is trivially async signal safe.
1892 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001893 .context("error registering signal handler")?;
Matt Delco84cf9c02019-10-07 22:38:13 -07001894 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07001895 block_signal(SIGRTMIN() + 0).context("failed to block signal")?;
Matt Delco84cf9c02019-10-07 22:38:13 -07001896 } else {
1897 unsafe {
Allen Webb44c728c2021-03-23 15:22:41 -05001898 extern "C" fn handle_signal<T: Vcpu>(_: c_int) {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001899 T::set_local_immediate_exit(true);
Matt Delco84cf9c02019-10-07 22:38:13 -07001900 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001901 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal::<T>)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001902 .context("error registering signal handler")?;
Matt Delco84cf9c02019-10-07 22:38:13 -07001903 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001904 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001905 Ok(())
1906}
1907
Steven Richmanf32d0b42020-06-20 21:45:32 -07001908// Sets up a vcpu and converts it into a runnable vcpu.
Zach Reizner2c770e62020-09-30 16:49:59 -07001909fn runnable_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001910 cpu_id: usize,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001911 kvm_vcpu_id: usize,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001912 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07001913 vm: impl VmArch,
Zach Reiznerdc748482021-04-14 13:59:30 -07001914 irq_chip: &mut dyn IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001915 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001916 run_rt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001917 vcpu_affinity: Vec<usize>,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001918 no_smt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001919 has_bios: bool,
1920 use_hypervisor_signals: bool,
Yusuke Sato31e136a2021-08-18 11:51:38 -07001921 enable_per_vm_core_scheduling: bool,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001922 host_cpu_topology: bool,
Zach Reizner2c770e62020-09-30 16:49:59 -07001923) -> Result<(V, VcpuRunHandle)>
Steven Richmanf32d0b42020-06-20 21:45:32 -07001924where
Zach Reizner2c770e62020-09-30 16:49:59 -07001925 V: VcpuArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001926{
Zach Reizner304e7312020-09-29 16:00:24 -07001927 let mut vcpu = match vcpu {
1928 Some(v) => v,
1929 None => {
1930 // If vcpu is None, it means this arch/hypervisor requires create_vcpu to be called from
1931 // the vcpu thread.
1932 match vm
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001933 .create_vcpu(kvm_vcpu_id)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001934 .context("failed to create vcpu")?
Zach Reizner304e7312020-09-29 16:00:24 -07001935 .downcast::<V>()
1936 {
1937 Ok(v) => *v,
1938 Err(_) => panic!("VM created wrong type of VCPU"),
1939 }
1940 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001941 };
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001942
Steven Richmanf32d0b42020-06-20 21:45:32 -07001943 irq_chip
Zach Reizner304e7312020-09-29 16:00:24 -07001944 .add_vcpu(cpu_id, &vcpu)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001945 .context("failed to add vcpu to irq chip")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07001946
Daniel Verkampcaf9ced2020-09-29 15:35:02 -07001947 if !vcpu_affinity.is_empty() {
1948 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
1949 error!("Failed to set CPU affinity: {}", e);
1950 }
1951 }
1952
Steven Richmanf32d0b42020-06-20 21:45:32 -07001953 Arch::configure_vcpu(
1954 vm.get_memory(),
1955 vm.get_hypervisor(),
1956 irq_chip,
1957 &mut vcpu,
1958 cpu_id,
1959 vcpu_count,
1960 has_bios,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001961 no_smt,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001962 host_cpu_topology,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001963 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001964 .context("failed to configure vcpu")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07001965
Yusuke Sato31e136a2021-08-18 11:51:38 -07001966 if !enable_per_vm_core_scheduling {
1967 // Do per-vCPU core scheduling by setting a unique cookie to each vCPU.
1968 if let Err(e) = enable_core_scheduling() {
1969 error!("Failed to enable core scheduling: {}", e);
1970 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001971 }
1972
Kansho Nishidaab205af2020-08-13 18:17:50 +09001973 if run_rt {
1974 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
1975 if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
1976 .and_then(|_| set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL)))
1977 {
1978 warn!("Failed to set vcpu to real time: {}", e);
1979 }
1980 }
1981
Steven Richmanf32d0b42020-06-20 21:45:32 -07001982 if use_hypervisor_signals {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001983 let mut v = get_blocked_signals().context("failed to retrieve signal mask for vcpu")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07001984 v.retain(|&x| x != SIGRTMIN() + 0);
Daniel Verkamp6b298582021-08-16 15:37:11 -07001985 vcpu.set_signal_mask(&v)
1986 .context("failed to set the signal mask for vcpu")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07001987 }
1988
Zach Reizner2c770e62020-09-30 16:49:59 -07001989 let vcpu_run_handle = vcpu
1990 .take_run_handle(Some(SIGRTMIN() + 0))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001991 .context("failed to set thread id for vcpu")?;
Zach Reizner2c770e62020-09-30 16:49:59 -07001992
1993 Ok((vcpu, vcpu_run_handle))
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001994}
1995
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001996#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1997fn handle_debug_msg<V>(
1998 cpu_id: usize,
1999 vcpu: &V,
2000 guest_mem: &GuestMemory,
2001 d: VcpuDebug,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002002 reply_tube: &mpsc::Sender<VcpuDebugStatusMessage>,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002003) -> Result<()>
2004where
2005 V: VcpuArch + 'static,
2006{
2007 match d {
2008 VcpuDebug::ReadRegs => {
2009 let msg = VcpuDebugStatusMessage {
2010 cpu: cpu_id as usize,
2011 msg: VcpuDebugStatus::RegValues(
Daniel Verkamp6b298582021-08-16 15:37:11 -07002012 Arch::debug_read_registers(vcpu as &V)
2013 .context("failed to handle a gdb ReadRegs command")?,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002014 ),
2015 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002016 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002017 .send(msg)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002018 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002019 }
2020 VcpuDebug::WriteRegs(regs) => {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002021 Arch::debug_write_registers(vcpu as &V, &regs)
2022 .context("failed to handle a gdb WriteRegs command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002023 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002024 .send(VcpuDebugStatusMessage {
2025 cpu: cpu_id as usize,
2026 msg: VcpuDebugStatus::CommandComplete,
2027 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002028 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002029 }
2030 VcpuDebug::ReadMem(vaddr, len) => {
2031 let msg = VcpuDebugStatusMessage {
2032 cpu: cpu_id as usize,
2033 msg: VcpuDebugStatus::MemoryRegion(
2034 Arch::debug_read_memory(vcpu as &V, guest_mem, vaddr, len)
2035 .unwrap_or(Vec::new()),
2036 ),
2037 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002038 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002039 .send(msg)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002040 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002041 }
2042 VcpuDebug::WriteMem(vaddr, buf) => {
2043 Arch::debug_write_memory(vcpu as &V, guest_mem, vaddr, &buf)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002044 .context("failed to handle a gdb WriteMem command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002045 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002046 .send(VcpuDebugStatusMessage {
2047 cpu: cpu_id as usize,
2048 msg: VcpuDebugStatus::CommandComplete,
2049 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002050 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002051 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002052 VcpuDebug::EnableSinglestep => {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002053 Arch::debug_enable_singlestep(vcpu as &V)
2054 .context("failed to handle a gdb EnableSingleStep command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002055 reply_tube
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002056 .send(VcpuDebugStatusMessage {
2057 cpu: cpu_id as usize,
2058 msg: VcpuDebugStatus::CommandComplete,
2059 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002060 .context("failed to send a debug status to GDB thread")
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002061 }
2062 VcpuDebug::SetHwBreakPoint(addrs) => {
2063 Arch::debug_set_hw_breakpoints(vcpu as &V, &addrs)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002064 .context("failed to handle a gdb SetHwBreakPoint command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002065 reply_tube
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002066 .send(VcpuDebugStatusMessage {
2067 cpu: cpu_id as usize,
2068 msg: VcpuDebugStatus::CommandComplete,
2069 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002070 .context("failed to send a debug status to GDB thread")
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002071 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002072 }
2073}
2074
Zach Reizner2c770e62020-09-30 16:49:59 -07002075fn run_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002076 cpu_id: usize,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002077 kvm_vcpu_id: usize,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002078 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07002079 vm: impl VmArch + 'static,
Zach Reiznerdc748482021-04-14 13:59:30 -07002080 mut irq_chip: Box<dyn IrqChipArch + 'static>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002081 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002082 run_rt: bool,
Daniel Verkamp107edb32019-04-05 09:58:48 -07002083 vcpu_affinity: Vec<usize>,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002084 delay_rt: bool,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002085 no_smt: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07002086 start_barrier: Arc<Barrier>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002087 has_bios: bool,
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07002088 mut io_bus: devices::Bus,
2089 mut mmio_bus: devices::Bus,
Michael Hoyle685316f2020-09-16 15:29:20 -07002090 exit_evt: Event,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002091 requires_pvclock_ctrl: bool,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002092 from_main_tube: mpsc::Receiver<VcpuControl>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002093 use_hypervisor_signals: bool,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002094 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_tube: Option<
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002095 mpsc::Sender<VcpuDebugStatusMessage>,
2096 >,
Yusuke Sato31e136a2021-08-18 11:51:38 -07002097 enable_per_vm_core_scheduling: bool,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002098 host_cpu_topology: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002099) -> Result<JoinHandle<()>>
2100where
Zach Reizner2c770e62020-09-30 16:49:59 -07002101 V: VcpuArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002102{
Zach Reizner8fb52112017-12-13 16:04:39 -08002103 thread::Builder::new()
2104 .name(format!("crosvm_vcpu{}", cpu_id))
2105 .spawn(move || {
Zach Reizner95885312020-01-29 18:06:01 -08002106 // The VCPU thread must trigger the `exit_evt` in all paths, and a `ScopedEvent`'s Drop
2107 // implementation accomplishes that.
2108 let _scoped_exit_evt = ScopedEvent::from(exit_evt);
2109
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002110 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2111 let guest_mem = vm.get_memory().clone();
Zach Reizner2c770e62020-09-30 16:49:59 -07002112 let runnable_vcpu = runnable_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002113 cpu_id,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002114 kvm_vcpu_id,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002115 vcpu,
2116 vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07002117 irq_chip.as_mut(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002118 vcpu_count,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002119 run_rt && !delay_rt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002120 vcpu_affinity,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002121 no_smt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002122 has_bios,
2123 use_hypervisor_signals,
Yusuke Sato31e136a2021-08-18 11:51:38 -07002124 enable_per_vm_core_scheduling,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002125 host_cpu_topology,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002126 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08002127
Zach Reizner8fb52112017-12-13 16:04:39 -08002128 start_barrier.wait();
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002129
Zach Reizner2c770e62020-09-30 16:49:59 -07002130 let (vcpu, vcpu_run_handle) = match runnable_vcpu {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002131 Ok(v) => v,
2132 Err(e) => {
Maciek Swiechc3011222021-11-24 21:01:04 +00002133 error!("failed to start vcpu {}: {:#}", cpu_id, e);
Steven Richmanf32d0b42020-06-20 21:45:32 -07002134 return;
2135 }
2136 };
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002137
Dylan Reidb0492662019-05-17 14:50:13 -07002138 let mut run_mode = VmRunMode::Running;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002139 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002140 if to_gdb_tube.is_some() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002141 // Wait until a GDB client attaches
2142 run_mode = VmRunMode::Breakpoint;
2143 }
2144
Dylan Reidb0492662019-05-17 14:50:13 -07002145 let mut interrupted_by_signal = false;
2146
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07002147 mmio_bus.set_access_id(cpu_id);
2148 io_bus.set_access_id(cpu_id);
2149
Dylan Reidb0492662019-05-17 14:50:13 -07002150 'vcpu_loop: loop {
2151 // Start by checking for messages to process and the run state of the CPU.
2152 // An extra check here for Running so there isn't a need to call recv unless a
2153 // message is likely to be ready because a signal was sent.
2154 if interrupted_by_signal || run_mode != VmRunMode::Running {
2155 'state_loop: loop {
2156 // Tries to get a pending message without blocking first.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002157 let msg = match from_main_tube.try_recv() {
Dylan Reidb0492662019-05-17 14:50:13 -07002158 Ok(m) => m,
2159 Err(mpsc::TryRecvError::Empty) if run_mode == VmRunMode::Running => {
2160 // If the VM is running and no message is pending, the state won't
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002161 // change.
Dylan Reidb0492662019-05-17 14:50:13 -07002162 break 'state_loop;
2163 }
2164 Err(mpsc::TryRecvError::Empty) => {
2165 // If the VM is not running, wait until a message is ready.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002166 match from_main_tube.recv() {
Dylan Reidb0492662019-05-17 14:50:13 -07002167 Ok(m) => m,
2168 Err(mpsc::RecvError) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002169 error!("Failed to read from main tube in vcpu");
Dylan Reidb0492662019-05-17 14:50:13 -07002170 break 'vcpu_loop;
2171 }
2172 }
2173 }
2174 Err(mpsc::TryRecvError::Disconnected) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002175 error!("Failed to read from main tube in vcpu");
Dylan Reidb0492662019-05-17 14:50:13 -07002176 break 'vcpu_loop;
2177 }
2178 };
2179
2180 // Collect all pending messages.
2181 let mut messages = vec![msg];
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002182 messages.append(&mut from_main_tube.try_iter().collect());
Dylan Reidb0492662019-05-17 14:50:13 -07002183
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002184 for msg in messages {
2185 match msg {
2186 VcpuControl::RunState(new_mode) => {
2187 run_mode = new_mode;
2188 match run_mode {
2189 VmRunMode::Running => break 'state_loop,
2190 VmRunMode::Suspending => {
2191 // On KVM implementations that use a paravirtualized
2192 // clock (e.g. x86), a flag must be set to indicate to
2193 // the guest kernel that a vCPU was suspended. The guest
2194 // kernel will use this flag to prevent the soft lockup
2195 // detection from triggering when this vCPU resumes,
2196 // which could happen days later in realtime.
2197 if requires_pvclock_ctrl {
2198 if let Err(e) = vcpu.pvclock_ctrl() {
2199 error!(
2200 "failed to tell hypervisor vcpu {} is suspending: {}",
2201 cpu_id, e
2202 );
2203 }
2204 }
2205 }
2206 VmRunMode::Breakpoint => {}
2207 VmRunMode::Exiting => break 'vcpu_loop,
2208 }
2209 }
2210 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2211 VcpuControl::Debug(d) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002212 match &to_gdb_tube {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002213 Some(ref ch) => {
2214 if let Err(e) = handle_debug_msg(
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07002215 cpu_id, &vcpu, &guest_mem, d, ch,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002216 ) {
2217 error!("Failed to handle gdb message: {}", e);
2218 }
2219 },
2220 None => {
2221 error!("VcpuControl::Debug received while GDB feature is disabled: {:?}", d);
Dylan Reidb0492662019-05-17 14:50:13 -07002222 }
2223 }
2224 }
Suleiman Souhlal2ac78b92021-02-01 12:33:26 +09002225 VcpuControl::MakeRT => {
2226 if run_rt && delay_rt {
2227 info!("Making vcpu {} RT\n", cpu_id);
2228 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
2229 if let Err(e) = set_rt_prio_limit(
2230 u64::from(DEFAULT_VCPU_RT_LEVEL))
2231 .and_then(|_|
2232 set_rt_round_robin(
2233 i32::from(DEFAULT_VCPU_RT_LEVEL)
2234 ))
2235 {
2236 warn!("Failed to set vcpu to real time: {}", e);
2237 }
2238 }
2239 }
Dylan Reidb0492662019-05-17 14:50:13 -07002240 }
2241 }
2242 }
2243 }
2244
2245 interrupted_by_signal = false;
2246
Steven Richman11dc6712020-09-02 15:39:14 -07002247 // Vcpus may have run a HLT instruction, which puts them into a state other than
2248 // VcpuRunState::Runnable. In that case, this call to wait_until_runnable blocks
2249 // until either the irqchip receives an interrupt for this vcpu, or until the main
2250 // thread kicks this vcpu as a result of some VmControl operation. In most IrqChip
2251 // implementations HLT instructions do not make it to crosvm, and thus this is a
2252 // no-op that always returns VcpuRunState::Runnable.
2253 match irq_chip.wait_until_runnable(&vcpu) {
2254 Ok(VcpuRunState::Runnable) => {}
2255 Ok(VcpuRunState::Interrupted) => interrupted_by_signal = true,
2256 Err(e) => error!(
2257 "error waiting for vcpu {} to become runnable: {}",
2258 cpu_id, e
2259 ),
2260 }
2261
2262 if !interrupted_by_signal {
2263 match vcpu.run(&vcpu_run_handle) {
2264 Ok(VcpuExit::IoIn { port, mut size }) => {
2265 let mut data = [0; 8];
2266 if size > data.len() {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002267 error!("unsupported IoIn size of {} bytes at port {:#x}", size, port);
Steven Richman11dc6712020-09-02 15:39:14 -07002268 size = data.len();
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002269 }
Steven Richman11dc6712020-09-02 15:39:14 -07002270 io_bus.read(port as u64, &mut data[..size]);
2271 if let Err(e) = vcpu.set_data(&data[..size]) {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002272 error!("failed to set return data for IoIn at port {:#x}: {}", port, e);
Steven Richman11dc6712020-09-02 15:39:14 -07002273 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002274 }
Steven Richman11dc6712020-09-02 15:39:14 -07002275 Ok(VcpuExit::IoOut {
2276 port,
2277 mut size,
2278 data,
2279 }) => {
2280 if size > data.len() {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002281 error!("unsupported IoOut size of {} bytes at port {:#x}", size, port);
Steven Richman11dc6712020-09-02 15:39:14 -07002282 size = data.len();
2283 }
2284 io_bus.write(port as u64, &data[..size]);
2285 }
2286 Ok(VcpuExit::MmioRead { address, size }) => {
2287 let mut data = [0; 8];
2288 mmio_bus.read(address, &mut data[..size]);
2289 // Setting data for mmio can not fail.
2290 let _ = vcpu.set_data(&data[..size]);
2291 }
2292 Ok(VcpuExit::MmioWrite {
2293 address,
2294 size,
2295 data,
2296 }) => {
2297 mmio_bus.write(address, &data[..size]);
2298 }
2299 Ok(VcpuExit::IoapicEoi { vector }) => {
2300 if let Err(e) = irq_chip.broadcast_eoi(vector) {
2301 error!(
2302 "failed to broadcast eoi {} on vcpu {}: {}",
2303 vector, cpu_id, e
2304 );
2305 }
2306 }
2307 Ok(VcpuExit::IrqWindowOpen) => {}
Leo Lai558460f2021-07-23 05:32:27 +00002308 Ok(VcpuExit::Hlt) => irq_chip.halted(cpu_id),
Steven Richman11dc6712020-09-02 15:39:14 -07002309 Ok(VcpuExit::Shutdown) => break,
2310 Ok(VcpuExit::FailEntry {
2311 hardware_entry_failure_reason,
2312 }) => {
2313 error!("vcpu hw run failure: {:#x}", hardware_entry_failure_reason);
Steven Richmanf32d0b42020-06-20 21:45:32 -07002314 break;
2315 }
Steven Richman11dc6712020-09-02 15:39:14 -07002316 Ok(VcpuExit::SystemEvent(_, _)) => break,
2317 Ok(VcpuExit::Debug { .. }) => {
2318 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2319 {
2320 let msg = VcpuDebugStatusMessage {
2321 cpu: cpu_id as usize,
2322 msg: VcpuDebugStatus::HitBreakPoint,
2323 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002324 if let Some(ref ch) = to_gdb_tube {
Steven Richman11dc6712020-09-02 15:39:14 -07002325 if let Err(e) = ch.send(msg) {
2326 error!("failed to notify breakpoint to GDB thread: {}", e);
2327 break;
2328 }
2329 }
2330 run_mode = VmRunMode::Breakpoint;
2331 }
2332 }
2333 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
2334 Err(e) => match e.errno() {
2335 libc::EINTR => interrupted_by_signal = true,
2336 libc::EAGAIN => {}
2337 _ => {
2338 error!("vcpu hit unknown error: {}", e);
2339 break;
2340 }
2341 },
2342 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002343 }
2344
2345 if interrupted_by_signal {
2346 if use_hypervisor_signals {
2347 // Try to clear the signal that we use to kick VCPU if it is pending before
2348 // attempting to handle pause requests.
2349 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
2350 error!("failed to clear pending signal: {}", e);
2351 break;
2352 }
2353 } else {
2354 vcpu.set_immediate_exit(false);
2355 }
David Tolnay8f3a2322018-11-30 17:11:35 -08002356 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002357
Steven Richman11dc6712020-09-02 15:39:14 -07002358 if let Err(e) = irq_chip.inject_interrupts(&vcpu) {
2359 error!("failed to inject interrupts for vcpu {}: {}", cpu_id, e);
2360 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002361 }
David Tolnay2bac1e72018-12-12 14:33:42 -08002362 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002363 .context("failed to spawn VCPU thread")
Zach Reizner39aa26b2017-12-12 18:03:23 -08002364}
2365
Zach Reiznera90649a2021-03-31 12:56:08 -07002366fn setup_vm_components(cfg: &Config) -> Result<VmComponents> {
David Tolnay2b089fc2019-03-04 15:33:22 -08002367 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +01002368 Some(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002369 open_file(
2370 initrd_path,
2371 true, /*read_only*/
2372 false, /*O_DIRECT*/
2373 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002374 .with_context(|| format!("failed to open initrd {}", initrd_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +01002375 )
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002376 } else {
2377 None
2378 };
2379
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002380 let vm_image = match cfg.executable_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +01002381 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002382 open_file(
2383 kernel_path,
2384 true, /*read_only*/
2385 false, /*O_DIRECT*/
2386 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002387 .with_context(|| format!("failed to open kernel image {}", kernel_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +01002388 ),
2389 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002390 open_file(bios_path, true /*read_only*/, false /*O_DIRECT*/)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002391 .with_context(|| format!("failed to open bios {}", bios_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +01002392 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002393 _ => panic!("Did not receive a bios or kernel, should be impossible."),
2394 };
2395
Will Deaconc48e7832021-07-30 19:03:06 +01002396 let swiotlb = if let Some(size) = cfg.swiotlb {
2397 Some(
2398 size.checked_mul(1024 * 1024)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002399 .ok_or_else(|| anyhow!("requested swiotlb size too large"))?,
Will Deaconc48e7832021-07-30 19:03:06 +01002400 )
2401 } else {
2402 match cfg.protected_vm {
2403 ProtectionType::Protected => Some(64 * 1024 * 1024),
2404 ProtectionType::Unprotected => None,
2405 }
2406 };
2407
Zach Reiznera90649a2021-03-31 12:56:08 -07002408 Ok(VmComponents {
Daniel Verkamp6a847062019-11-26 13:16:35 -08002409 memory_size: cfg
2410 .memory
2411 .unwrap_or(256)
2412 .checked_mul(1024 * 1024)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002413 .ok_or_else(|| anyhow!("requested memory size too large"))?,
Will Deaconc48e7832021-07-30 19:03:06 +01002414 swiotlb,
Dylan Reid059a1882018-07-23 17:58:09 -07002415 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -07002416 vcpu_affinity: cfg.vcpu_affinity.clone(),
Daniel Verkamp8a72afc2021-03-15 17:55:52 -07002417 cpu_clusters: cfg.cpu_clusters.clone(),
2418 cpu_capacity: cfg.cpu_capacity.clone(),
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002419 no_smt: cfg.no_smt,
Sergey Senozhatsky1e369c52021-04-13 20:23:51 +09002420 hugepages: cfg.hugepages,
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002421 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002422 android_fstab: cfg
2423 .android_fstab
2424 .as_ref()
Daniel Verkamp6b298582021-08-16 15:37:11 -07002425 .map(|x| {
2426 File::open(x)
2427 .with_context(|| format!("failed to open android fstab file {}", x.display()))
2428 })
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002429 .map_or(Ok(None), |v| v.map(Some))?,
Kansho Nishida282115b2019-12-18 13:13:14 +09002430 pstore: cfg.pstore.clone(),
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002431 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -07002432 extra_kernel_params: cfg.params.clone(),
Tomasz Jeznach42644642020-05-20 23:27:59 -07002433 acpi_sdts: cfg
2434 .acpi_tables
2435 .iter()
Daniel Verkamp6b298582021-08-16 15:37:11 -07002436 .map(|path| {
2437 SDT::from_file(path)
2438 .with_context(|| format!("failed to open ACPI file {}", path.display()))
2439 })
Tomasz Jeznach42644642020-05-20 23:27:59 -07002440 .collect::<Result<Vec<SDT>>>()?,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002441 rt_cpus: cfg.rt_cpus.clone(),
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002442 delay_rt: cfg.delay_rt,
Will Deacon7d2b8ac2020-10-06 18:51:12 +01002443 protected_vm: cfg.protected_vm,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002444 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznera90649a2021-03-31 12:56:08 -07002445 gdb: None,
Tomasz Jeznachccb26942021-03-30 22:44:11 -07002446 dmi_path: cfg.dmi_path.clone(),
Tomasz Jeznachd93c29f2021-04-12 11:00:24 -07002447 no_legacy: cfg.no_legacy,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002448 host_cpu_topology: cfg.host_cpu_topology,
Zach Reiznera90649a2021-03-31 12:56:08 -07002449 })
2450}
2451
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002452pub enum ExitState {
2453 Reset,
2454 Stop,
2455}
2456
2457pub fn run_config(cfg: Config) -> Result<ExitState> {
Zach Reiznerdc748482021-04-14 13:59:30 -07002458 let components = setup_vm_components(&cfg)?;
2459
2460 let guest_mem_layout =
Daniel Verkamp6b298582021-08-16 15:37:11 -07002461 Arch::guest_memory_layout(&components).context("failed to create guest memory layout")?;
2462 let guest_mem = GuestMemory::new(&guest_mem_layout).context("failed to create guest memory")?;
Zach Reiznerdc748482021-04-14 13:59:30 -07002463 let mut mem_policy = MemoryPolicy::empty();
2464 if components.hugepages {
2465 mem_policy |= MemoryPolicy::USE_HUGEPAGES;
2466 }
Quentin Perret26203802021-12-02 09:48:43 +00002467 guest_mem.set_memory_policy(mem_policy);
Daniel Verkamp6b298582021-08-16 15:37:11 -07002468 let kvm = Kvm::new_with_path(&cfg.kvm_device_path).context("failed to create kvm")?;
2469 let vm = KvmVm::new(&kvm, guest_mem).context("failed to create vm")?;
2470 let vm_clone = vm.try_clone().context("failed to clone vm")?;
Zach Reiznerdc748482021-04-14 13:59:30 -07002471
2472 enum KvmIrqChip {
2473 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2474 Split(KvmSplitIrqChip),
2475 Kernel(KvmKernelIrqChip),
2476 }
2477
2478 impl KvmIrqChip {
2479 fn as_mut(&mut self) -> &mut dyn IrqChipArch {
2480 match self {
2481 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2482 KvmIrqChip::Split(i) => i,
2483 KvmIrqChip::Kernel(i) => i,
2484 }
2485 }
2486 }
2487
2488 let ioapic_host_tube;
2489 let mut irq_chip = if cfg.split_irqchip {
2490 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
2491 unimplemented!("KVM split irqchip mode only supported on x86 processors");
2492 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2493 {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002494 let (host_tube, ioapic_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerdc748482021-04-14 13:59:30 -07002495 ioapic_host_tube = Some(host_tube);
2496 KvmIrqChip::Split(
2497 KvmSplitIrqChip::new(
2498 vm_clone,
2499 components.vcpu_count,
2500 ioapic_device_tube,
2501 Some(120),
2502 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002503 .context("failed to create IRQ chip")?,
Zach Reiznerdc748482021-04-14 13:59:30 -07002504 )
2505 }
2506 } else {
2507 ioapic_host_tube = None;
2508 KvmIrqChip::Kernel(
Daniel Verkamp6b298582021-08-16 15:37:11 -07002509 KvmKernelIrqChip::new(vm_clone, components.vcpu_count)
2510 .context("failed to create IRQ chip")?,
Zach Reiznerdc748482021-04-14 13:59:30 -07002511 )
2512 };
2513
2514 run_vm::<KvmVcpu, KvmVm>(cfg, components, vm, irq_chip.as_mut(), ioapic_host_tube)
2515}
2516
2517fn run_vm<Vcpu, V>(
Zach Reiznera90649a2021-03-31 12:56:08 -07002518 cfg: Config,
2519 #[allow(unused_mut)] mut components: VmComponents,
Zach Reiznerdc748482021-04-14 13:59:30 -07002520 mut vm: V,
2521 irq_chip: &mut dyn IrqChipArch,
2522 ioapic_host_tube: Option<Tube>,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002523) -> Result<ExitState>
Zach Reiznera90649a2021-03-31 12:56:08 -07002524where
2525 Vcpu: VcpuArch + 'static,
2526 V: VmArch + 'static,
Zach Reiznera90649a2021-03-31 12:56:08 -07002527{
2528 if cfg.sandbox {
2529 // Printing something to the syslog before entering minijail so that libc's syslogger has a
2530 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
2531 // access to those files will not be possible.
2532 info!("crosvm entering multiprocess mode");
2533 }
2534
Daniel Verkampf1439d42021-05-21 13:55:10 -07002535 #[cfg(feature = "usb")]
Zach Reiznera90649a2021-03-31 12:56:08 -07002536 let (usb_control_tube, usb_provider) =
Daniel Verkamp6b298582021-08-16 15:37:11 -07002537 HostBackendDeviceProvider::new().context("failed to create usb provider")?;
Daniel Verkampf1439d42021-05-21 13:55:10 -07002538
Zach Reiznera90649a2021-03-31 12:56:08 -07002539 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
2540 // before any jailed devices have been spawned, so that we can catch any of them that fail very
2541 // quickly.
Daniel Verkamp6b298582021-08-16 15:37:11 -07002542 let sigchld_fd = SignalFd::new(libc::SIGCHLD).context("failed to create signalfd")?;
Dylan Reid059a1882018-07-23 17:58:09 -07002543
Zach Reiznera60744b2019-02-13 17:33:32 -08002544 let control_server_socket = match &cfg.socket_path {
2545 Some(path) => Some(UnlinkUnixSeqpacketListener(
Daniel Verkamp6b298582021-08-16 15:37:11 -07002546 UnixSeqpacketListener::bind(path).context("failed to create control server")?,
Zach Reiznera60744b2019-02-13 17:33:32 -08002547 )),
2548 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07002549 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002550
Zach Reiznera90649a2021-03-31 12:56:08 -07002551 let mut control_tubes = Vec::new();
2552
2553 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2554 if let Some(port) = cfg.gdb {
2555 // GDB needs a control socket to interrupt vcpus.
Daniel Verkamp6b298582021-08-16 15:37:11 -07002556 let (gdb_host_tube, gdb_control_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznera90649a2021-03-31 12:56:08 -07002557 control_tubes.push(TaggedControlTube::Vm(gdb_host_tube));
2558 components.gdb = Some((port, gdb_control_tube));
2559 }
2560
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09002561 for wl_cfg in &cfg.vhost_user_wl {
2562 let wayland_host_tube = UnixSeqpacket::connect(&wl_cfg.vm_tube)
2563 .map(Tube::new)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002564 .context("failed to connect to wayland tube")?;
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09002565 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
2566 }
2567
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002568 let mut vhost_user_gpu_tubes = Vec::with_capacity(cfg.vhost_user_gpu.len());
2569 for _ in 0..cfg.vhost_user_gpu.len() {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002570 let (host_tube, device_tube) = Tube::pair().context("failed to create tube")?;
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002571 vhost_user_gpu_tubes.push((
Daniel Verkamp6b298582021-08-16 15:37:11 -07002572 host_tube.try_clone().context("failed to clone tube")?,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002573 device_tube,
2574 ));
2575 control_tubes.push(TaggedControlTube::VmMemory(host_tube));
2576 }
2577
Daniel Verkamp6b298582021-08-16 15:37:11 -07002578 let (wayland_host_tube, wayland_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002579 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
Dylan Reid059a1882018-07-23 17:58:09 -07002580 // Balloon gets a special socket so balloon requests can be forwarded from the main process.
Daniel Verkamp6b298582021-08-16 15:37:11 -07002581 let (balloon_host_tube, balloon_device_tube) = Tube::pair().context("failed to create tube")?;
Hikaru Nishidaaf3f3bb2021-05-21 12:03:54 +09002582 // Set recv timeout to avoid deadlock on sending BalloonControlCommand before guest is ready.
2583 balloon_host_tube
2584 .set_recv_timeout(Some(Duration::from_millis(100)))
Daniel Verkamp6b298582021-08-16 15:37:11 -07002585 .context("failed to create tube")?;
Dylan Reid059a1882018-07-23 17:58:09 -07002586
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002587 // Create one control socket per disk.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002588 let mut disk_device_tubes = Vec::new();
2589 let mut disk_host_tubes = Vec::new();
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002590 let disk_count = cfg.disks.len();
2591 for _ in 0..disk_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002592 let (disk_host_tub, disk_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002593 disk_host_tubes.push(disk_host_tub);
2594 disk_device_tubes.push(disk_device_tube);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002595 }
2596
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002597 let mut pmem_device_tubes = Vec::new();
Daniel Verkampe1980a92020-02-07 11:00:55 -08002598 let pmem_count = cfg.pmem_devices.len();
2599 for _ in 0..pmem_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002600 let (pmem_host_tube, pmem_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002601 pmem_device_tubes.push(pmem_device_tube);
2602 control_tubes.push(TaggedControlTube::VmMsync(pmem_host_tube));
Daniel Verkampe1980a92020-02-07 11:00:55 -08002603 }
2604
Daniel Verkamp6b298582021-08-16 15:37:11 -07002605 let (gpu_host_tube, gpu_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002606 control_tubes.push(TaggedControlTube::VmMemory(gpu_host_tube));
Gurchetan Singh96beafc2019-05-15 09:46:52 -07002607
Zach Reiznerdc748482021-04-14 13:59:30 -07002608 if let Some(ioapic_host_tube) = ioapic_host_tube {
2609 control_tubes.push(TaggedControlTube::VmIrq(ioapic_host_tube));
2610 }
Zhuocheng Dingf2e90bf2019-12-02 15:50:20 +08002611
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002612 let battery = if cfg.battery_type.is_some() {
Daniel Verkampcfe49462021-08-19 17:11:05 -07002613 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(clippy::manual_map))]
Alex Lauf408c732020-11-10 18:24:04 +09002614 let jail = match simple_jail(&cfg, "battery")? {
Daniel Verkampcfe49462021-08-19 17:11:05 -07002615 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(unused_mut))]
Alex Lauf408c732020-11-10 18:24:04 +09002616 Some(mut jail) => {
2617 // Setup a bind mount to the system D-Bus socket if the powerd monitor is used.
2618 #[cfg(feature = "power-monitor-powerd")]
2619 {
Fergus Dall51200512021-08-19 12:54:26 +10002620 add_current_user_to_jail(&mut jail)?;
Alex Lauf408c732020-11-10 18:24:04 +09002621
2622 // Create a tmpfs in the device's root directory so that we can bind mount files.
2623 jail.mount_with_data(
2624 Path::new("none"),
2625 Path::new("/"),
2626 "tmpfs",
2627 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
2628 "size=67108864",
2629 )?;
2630
2631 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
2632 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
2633 }
2634 Some(jail)
2635 }
2636 None => None,
2637 };
2638 (&cfg.battery_type, jail)
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002639 } else {
2640 (&cfg.battery_type, None)
2641 };
2642
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002643 let map_request: Arc<Mutex<Option<ExternalMapping>>> = Arc::new(Mutex::new(None));
2644
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002645 let fs_count = cfg
2646 .shared_dirs
2647 .iter()
2648 .filter(|sd| sd.kind == SharedDirKind::FS)
2649 .count();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002650 let mut fs_device_tubes = Vec::with_capacity(fs_count);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002651 for _ in 0..fs_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002652 let (fs_host_tube, fs_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002653 control_tubes.push(TaggedControlTube::Fs(fs_host_tube));
2654 fs_device_tubes.push(fs_device_tube);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002655 }
2656
Daniel Verkamp6b298582021-08-16 15:37:11 -07002657 let exit_evt = Event::new().context("failed to create event")?;
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002658 let reset_evt = Event::new().context("failed to create event")?;
Zach Reiznerdc748482021-04-14 13:59:30 -07002659 let mut sys_allocator = Arch::create_system_allocator(vm.get_memory());
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09002660
2661 // Allocate the ramoops region first. AArch64::build_vm() assumes this.
2662 let ramoops_region = match &components.pstore {
2663 Some(pstore) => Some(
Dennis Kempin65740a62021-10-18 16:46:57 -07002664 arch::pstore::create_memory_region(&mut vm, &mut sys_allocator, pstore)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002665 .context("failed to allocate pstore region")?,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09002666 ),
2667 None => None,
2668 };
2669
Zide Chen71435c12021-03-03 15:02:02 -08002670 let phys_max_addr = Arch::get_phys_max_addr();
Tomasz Nowickiab86d522021-09-22 05:50:46 +00002671 let mut devices = create_devices(
Zach Reiznerdc748482021-04-14 13:59:30 -07002672 &cfg,
2673 &mut vm,
2674 &mut sys_allocator,
2675 &exit_evt,
Zide Chen71435c12021-03-03 15:02:02 -08002676 phys_max_addr,
Zach Reiznerdc748482021-04-14 13:59:30 -07002677 &mut control_tubes,
2678 wayland_device_tube,
2679 gpu_device_tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002680 vhost_user_gpu_tubes,
Zach Reiznerdc748482021-04-14 13:59:30 -07002681 balloon_device_tube,
2682 &mut disk_device_tubes,
2683 &mut pmem_device_tubes,
2684 &mut fs_device_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07002685 #[cfg(feature = "usb")]
Zach Reiznerdc748482021-04-14 13:59:30 -07002686 usb_provider,
2687 Arc::clone(&map_request),
2688 )?;
2689
Peter Fangc2bba082021-04-19 18:40:24 -07002690 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Tomasz Nowickiab86d522021-09-22 05:50:46 +00002691 for device in devices
2692 .iter_mut()
2693 .filter_map(|(dev, _)| dev.as_pci_device_mut())
2694 {
Peter Fangc2bba082021-04-19 18:40:24 -07002695 let sdts = device
2696 .generate_acpi(components.acpi_sdts)
2697 .or_else(|| {
2698 error!("ACPI table generation error");
2699 None
2700 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002701 .ok_or_else(|| anyhow!("failed to generate ACPI table"))?;
Peter Fangc2bba082021-04-19 18:40:24 -07002702 components.acpi_sdts = sdts;
2703 }
2704
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002705 // KVM_CREATE_VCPU uses apic id for x86 and uses cpu id for others.
2706 let mut kvm_vcpu_ids = Vec::new();
2707
Kuo-Hsin Yang6139da62021-04-14 16:55:24 +08002708 #[cfg_attr(not(feature = "direct"), allow(unused_mut))]
Zach Reiznerdc748482021-04-14 13:59:30 -07002709 let mut linux = Arch::build_vm::<V, Vcpu>(
Trent Begin17ccaad2019-04-17 13:51:25 -06002710 components,
Zach Reiznerdc748482021-04-14 13:59:30 -07002711 &exit_evt,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002712 &reset_evt,
Zach Reiznerdc748482021-04-14 13:59:30 -07002713 &mut sys_allocator,
Trent Begin17ccaad2019-04-17 13:51:25 -06002714 &cfg.serial_parameters,
Matt Delco45caf912019-11-13 08:11:09 -08002715 simple_jail(&cfg, "serial")?,
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002716 battery,
Zach Reiznera90649a2021-03-31 12:56:08 -07002717 vm,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09002718 ramoops_region,
Tomasz Nowickiab86d522021-09-22 05:50:46 +00002719 devices,
Zach Reiznerdc748482021-04-14 13:59:30 -07002720 irq_chip,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002721 &mut kvm_vcpu_ids,
Trent Begin17ccaad2019-04-17 13:51:25 -06002722 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002723 .context("the architecture failed to build the vm")?;
Lepton Wu60893882018-11-21 11:06:18 -08002724
Daniel Verkamp1286b482021-11-30 15:14:16 -08002725 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2726 {
2727 // Create Pcie Root Port
2728 let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new()));
2729 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
2730 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
2731 let sec_bus = (1..255)
2732 .find(|&bus_num| sys_allocator.pci_bus_empty(bus_num))
2733 .context("failed to find empty bus for Pci hotplug")?;
2734 let pci_bridge = Box::new(PciBridge::new(
2735 pcie_root_port.clone(),
2736 msi_device_tube,
2737 0,
2738 sec_bus,
2739 ));
2740 Arch::register_pci_device(&mut linux, pci_bridge, None, &mut sys_allocator)
2741 .context("Failed to configure pci bridge device")?;
2742 linux.hotplug_bus.push(pcie_root_port);
2743 }
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08002744
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08002745 #[cfg(feature = "direct")]
2746 if let Some(pmio) = &cfg.direct_pmio {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002747 let direct_io = Arc::new(
2748 devices::DirectIo::new(&pmio.path, false).context("failed to open direct io device")?,
2749 );
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08002750 for range in pmio.ranges.iter() {
2751 linux
2752 .io_bus
Junichi Uekawab180f9c2021-12-07 09:21:36 +09002753 .insert_sync(direct_io.clone(), range.base, range.len)
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08002754 .unwrap();
2755 }
2756 };
2757
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002758 #[cfg(feature = "direct")]
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07002759 if let Some(mmio) = &cfg.direct_mmio {
Xiong Zhang46471a02021-11-12 00:34:42 +08002760 let direct_mmio = Arc::new(
Junichi Uekawab180f9c2021-12-07 09:21:36 +09002761 devices::DirectMmio::new(&mmio.path, false, &mmio.ranges)
Xiong Zhang46471a02021-11-12 00:34:42 +08002762 .context("failed to open direct mmio device")?,
Daniel Verkamp6b298582021-08-16 15:37:11 -07002763 );
Xiong Zhang46471a02021-11-12 00:34:42 +08002764
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07002765 for range in mmio.ranges.iter() {
2766 linux
2767 .mmio_bus
Junichi Uekawab180f9c2021-12-07 09:21:36 +09002768 .insert_sync(direct_mmio.clone(), range.base, range.len)
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07002769 .unwrap();
2770 }
2771 };
2772
2773 #[cfg(feature = "direct")]
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002774 let mut irqs = Vec::new();
2775
2776 #[cfg(feature = "direct")]
2777 for irq in &cfg.direct_level_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07002778 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002779 warn!("irq {} already reserved.", irq);
2780 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07002781 let trigger = Event::new().context("failed to create event")?;
2782 let resample = Event::new().context("failed to create event")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002783 linux
2784 .irq_chip
2785 .register_irq_event(*irq, &trigger, Some(&resample))
2786 .unwrap();
Daniel Verkamp6b298582021-08-16 15:37:11 -07002787 let direct_irq = devices::DirectIrq::new(trigger, Some(resample))
2788 .context("failed to enable interrupt forwarding")?;
2789 direct_irq
2790 .irq_enable(*irq)
2791 .context("failed to enable interrupt forwarding")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002792 irqs.push(direct_irq);
2793 }
2794
2795 #[cfg(feature = "direct")]
2796 for irq in &cfg.direct_edge_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07002797 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002798 warn!("irq {} already reserved.", irq);
2799 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07002800 let trigger = Event::new().context("failed to create event")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002801 linux
2802 .irq_chip
2803 .register_irq_event(*irq, &trigger, None)
2804 .unwrap();
Daniel Verkamp6b298582021-08-16 15:37:11 -07002805 let direct_irq = devices::DirectIrq::new(trigger, None)
2806 .context("failed to enable interrupt forwarding")?;
2807 direct_irq
2808 .irq_enable(*irq)
2809 .context("failed to enable interrupt forwarding")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002810 irqs.push(direct_irq);
2811 }
2812
Daniel Verkamp6b298582021-08-16 15:37:11 -07002813 let gralloc = RutabagaGralloc::new().context("failed to create gralloc")?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002814 run_control(
2815 linux,
Zach Reiznerdc748482021-04-14 13:59:30 -07002816 sys_allocator,
Zach Reiznera60744b2019-02-13 17:33:32 -08002817 control_server_socket,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002818 control_tubes,
2819 balloon_host_tube,
2820 &disk_host_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07002821 #[cfg(feature = "usb")]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002822 usb_control_tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07002823 exit_evt,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002824 reset_evt,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002825 sigchld_fd,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002826 cfg.sandbox,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002827 Arc::clone(&map_request),
Gurchetan Singh293913c2020-12-09 10:44:13 -08002828 gralloc,
Yusuke Sato31e136a2021-08-18 11:51:38 -07002829 cfg.per_vm_core_scheduling,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002830 cfg.host_cpu_topology,
2831 kvm_vcpu_ids,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002832 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07002833}
2834
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08002835fn get_hp_bus<V: VmArch, Vcpu: VcpuArch>(
2836 linux: &RunnableLinuxVm<V, Vcpu>,
2837 host_addr: PciAddress,
2838) -> Result<(Arc<Mutex<dyn HotPlugBus>>, u8)> {
2839 for hp_bus in linux.hotplug_bus.iter() {
2840 if let Some(number) = hp_bus.lock().is_match(host_addr) {
2841 return Ok((hp_bus.clone(), number));
2842 }
2843 }
2844 Err(anyhow!("Failed to find a suitable hotplug bus"))
2845}
2846
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002847#[allow(dead_code)]
2848fn add_vfio_device<V: VmArch, Vcpu: VcpuArch>(
2849 linux: &mut RunnableLinuxVm<V, Vcpu>,
2850 sys_allocator: &mut SystemAllocator,
2851 cfg: &Config,
2852 control_tubes: &mut Vec<TaggedControlTube>,
2853 vfio_path: &Path,
2854) -> Result<()> {
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08002855 let host_os_str = vfio_path
2856 .file_name()
2857 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
2858 let host_str = host_os_str
2859 .to_str()
2860 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
2861 let host_addr = PciAddress::from_string(host_str);
2862
2863 let (hp_bus, bus_num) = get_hp_bus(linux, host_addr)?;
2864
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002865 let mut endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>> = BTreeMap::new();
2866 let (vfio_pci_device, jail) = create_vfio_device(
2867 cfg,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002868 &linux.vm,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002869 sys_allocator,
2870 control_tubes,
2871 vfio_path,
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08002872 Some(bus_num),
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002873 &mut endpoints,
2874 false,
2875 )?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002876
2877 let pci_address = Arch::register_pci_device(linux, vfio_pci_device, jail, sys_allocator)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002878 .context("Failed to configure pci hotplug device")?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002879
Daniel Verkamp6b298582021-08-16 15:37:11 -07002880 let host_os_str = vfio_path
2881 .file_name()
2882 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
2883 let host_str = host_os_str
2884 .to_str()
2885 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002886 let host_addr = PciAddress::from_string(host_str);
2887 let host_key = HostHotPlugKey::Vfio { host_addr };
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08002888 let mut hp_bus = hp_bus.lock();
2889 hp_bus.add_hotplug_device(host_key, pci_address);
2890 hp_bus.hot_plug(pci_address);
2891 Ok(())
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002892}
2893
2894#[allow(dead_code)]
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002895fn remove_vfio_device<V: VmArch, Vcpu: VcpuArch>(
2896 linux: &RunnableLinuxVm<V, Vcpu>,
Xiong Zhang2d45b912021-05-13 16:22:25 +08002897 sys_allocator: &mut SystemAllocator,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002898 vfio_path: &Path,
2899) -> Result<()> {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002900 let host_os_str = vfio_path
2901 .file_name()
2902 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
2903 let host_str = host_os_str
2904 .to_str()
2905 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002906 let host_addr = PciAddress::from_string(host_str);
2907 let host_key = HostHotPlugKey::Vfio { host_addr };
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08002908 for hp_bus in linux.hotplug_bus.iter() {
2909 let mut hp_bus_lock = hp_bus.lock();
2910 if let Some(pci_addr) = hp_bus_lock.get_hotplug_device(host_key) {
2911 hp_bus_lock.hot_unplug(pci_addr);
Xiong Zhang2d45b912021-05-13 16:22:25 +08002912 sys_allocator.release_pci(pci_addr.bus, pci_addr.dev, pci_addr.func);
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08002913 return Ok(());
2914 }
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002915 }
2916
Daniel Verkamp6b298582021-08-16 15:37:11 -07002917 Err(anyhow!("HotPlugBus hasn't been implemented"))
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002918}
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002919
Daniel Verkamp29409802021-02-24 14:46:19 -08002920/// Signals all running VCPUs to vmexit, sends VcpuControl message to each VCPU tube, and tells
2921/// `irq_chip` to stop blocking halted VCPUs. The channel message is set first because both the
Steven Richman11dc6712020-09-02 15:39:14 -07002922/// signal and the irq_chip kick could cause the VCPU thread to continue through the VCPU run
2923/// loop.
2924fn kick_all_vcpus(
2925 vcpu_handles: &[(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)],
Zach Reiznerdc748482021-04-14 13:59:30 -07002926 irq_chip: &dyn IrqChip,
Daniel Verkamp29409802021-02-24 14:46:19 -08002927 message: VcpuControl,
Steven Richman11dc6712020-09-02 15:39:14 -07002928) {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002929 for (handle, tube) in vcpu_handles {
Daniel Verkamp29409802021-02-24 14:46:19 -08002930 if let Err(e) = tube.send(message.clone()) {
2931 error!("failed to send VcpuControl: {}", e);
Steven Richman11dc6712020-09-02 15:39:14 -07002932 }
2933 let _ = handle.kill(SIGRTMIN() + 0);
2934 }
2935 irq_chip.kick_halted_vcpus();
2936}
2937
Zach Reiznerdc748482021-04-14 13:59:30 -07002938fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
2939 mut linux: RunnableLinuxVm<V, Vcpu>,
2940 mut sys_allocator: SystemAllocator,
Zach Reiznera60744b2019-02-13 17:33:32 -08002941 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002942 mut control_tubes: Vec<TaggedControlTube>,
2943 balloon_host_tube: Tube,
2944 disk_host_tubes: &[Tube],
Daniel Verkampf1439d42021-05-21 13:55:10 -07002945 #[cfg(feature = "usb")] usb_control_tube: Tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07002946 exit_evt: Event,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002947 reset_evt: Event,
Zach Reizner55a9e502018-10-03 10:22:32 -07002948 sigchld_fd: SignalFd,
Lepton Wu20333e42019-03-14 10:48:03 -07002949 sandbox: bool,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002950 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Gurchetan Singh293913c2020-12-09 10:44:13 -08002951 mut gralloc: RutabagaGralloc,
Yusuke Sato31e136a2021-08-18 11:51:38 -07002952 enable_per_vm_core_scheduling: bool,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002953 host_cpu_topology: bool,
2954 kvm_vcpu_ids: Vec<usize>,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002955) -> Result<ExitState> {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002956 #[derive(PollToken)]
2957 enum Token {
2958 Exit,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002959 Reset,
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002960 Suspend,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002961 ChildSignal,
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002962 IrqFd { index: IrqEventIndex },
Zach Reiznera60744b2019-02-13 17:33:32 -08002963 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002964 VmControl { index: usize },
2965 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002966
Zach Reizner19ad1f32019-12-12 18:58:50 -08002967 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08002968 .set_raw_mode()
2969 .expect("failed to set terminal raw mode");
2970
Michael Hoylee392c462020-10-07 03:29:24 -07002971 let wait_ctx = WaitContext::build_with(&[
Zach Reiznerdc748482021-04-14 13:59:30 -07002972 (&exit_evt, Token::Exit),
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002973 (&reset_evt, Token::Reset),
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002974 (&linux.suspend_evt, Token::Suspend),
Zach Reiznerb2110be2019-07-23 15:55:03 -07002975 (&sigchld_fd, Token::ChildSignal),
2976 ])
Daniel Verkamp6b298582021-08-16 15:37:11 -07002977 .context("failed to add descriptor to wait context")?;
Zach Reiznerb2110be2019-07-23 15:55:03 -07002978
Zach Reiznera60744b2019-02-13 17:33:32 -08002979 if let Some(socket_server) = &control_server_socket {
Michael Hoylee392c462020-10-07 03:29:24 -07002980 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08002981 .add(socket_server, Token::VmControlServer)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002982 .context("failed to add descriptor to wait context")?;
Zach Reiznera60744b2019-02-13 17:33:32 -08002983 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002984 for (index, socket) in control_tubes.iter().enumerate() {
Michael Hoylee392c462020-10-07 03:29:24 -07002985 wait_ctx
Zach Reizner55a9e502018-10-03 10:22:32 -07002986 .add(socket.as_ref(), Token::VmControl { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002987 .context("failed to add descriptor to wait context")?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002988 }
2989
Steven Richmanf32d0b42020-06-20 21:45:32 -07002990 let events = linux
2991 .irq_chip
2992 .irq_event_tokens()
Daniel Verkamp6b298582021-08-16 15:37:11 -07002993 .context("failed to add descriptor to wait context")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002994
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002995 for (index, _gsi, evt) in events {
Michael Hoylee392c462020-10-07 03:29:24 -07002996 wait_ctx
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002997 .add(&evt, Token::IrqFd { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002998 .context("failed to add descriptor to wait context")?;
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002999 }
3000
Lepton Wu20333e42019-03-14 10:48:03 -07003001 if sandbox {
3002 // Before starting VCPUs, in case we started with some capabilities, drop them all.
Daniel Verkamp6b298582021-08-16 15:37:11 -07003003 drop_capabilities().context("failed to drop process capabilities")?;
Lepton Wu20333e42019-03-14 10:48:03 -07003004 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08003005
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003006 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
3007 // Create a channel for GDB thread.
3008 let (to_gdb_channel, from_vcpu_channel) = if linux.gdb.is_some() {
3009 let (s, r) = mpsc::channel();
3010 (Some(s), Some(r))
3011 } else {
3012 (None, None)
3013 };
3014
Steven Richmanf32d0b42020-06-20 21:45:32 -07003015 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
3016 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
Steven Richmanf32d0b42020-06-20 21:45:32 -07003017 let use_hypervisor_signals = !linux
3018 .vm
3019 .get_hypervisor()
3020 .check_capability(&HypervisorCap::ImmediateExit);
Zach Reizner304e7312020-09-29 16:00:24 -07003021 setup_vcpu_signal_handler::<Vcpu>(use_hypervisor_signals)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07003022
Zach Reizner304e7312020-09-29 16:00:24 -07003023 let vcpus: Vec<Option<_>> = match linux.vcpus.take() {
Andrew Walbran9cfdbd92021-01-11 17:40:34 +00003024 Some(vec) => vec.into_iter().map(Some).collect(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07003025 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
3026 };
Yusuke Sato31e136a2021-08-18 11:51:38 -07003027 // Enable core scheduling before creating vCPUs so that the cookie will be
3028 // shared by all vCPU threads.
3029 // TODO(b/199312402): Avoid enabling core scheduling for the crosvm process
3030 // itself for even better performance. Only vCPUs need the feature.
3031 if enable_per_vm_core_scheduling {
3032 if let Err(e) = enable_core_scheduling() {
3033 error!("Failed to enable core scheduling: {}", e);
3034 }
3035 }
Daniel Verkamp94c35272019-09-12 13:31:30 -07003036 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Dylan Reidb0492662019-05-17 14:50:13 -07003037 let (to_vcpu_channel, from_main_channel) = mpsc::channel();
Daniel Verkampc677fb42020-09-08 13:47:49 -07003038 let vcpu_affinity = match linux.vcpu_affinity.clone() {
3039 Some(VcpuAffinity::Global(v)) => v,
3040 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(),
3041 None => Default::default(),
3042 };
Zach Reizner55a9e502018-10-03 10:22:32 -07003043 let handle = run_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07003044 cpu_id,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08003045 kvm_vcpu_ids[cpu_id],
Zach Reizner55a9e502018-10-03 10:22:32 -07003046 vcpu,
Daniel Verkamp6b298582021-08-16 15:37:11 -07003047 linux.vm.try_clone().context("failed to clone vm")?,
3048 linux
3049 .irq_chip
3050 .try_box_clone()
3051 .context("failed to clone irqchip")?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003052 linux.vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09003053 linux.rt_cpus.contains(&cpu_id),
Daniel Verkampc677fb42020-09-08 13:47:49 -07003054 vcpu_affinity,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09003055 linux.delay_rt,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09003056 linux.no_smt,
Zach Reizner55a9e502018-10-03 10:22:32 -07003057 vcpu_thread_barrier.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07003058 linux.has_bios,
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07003059 (*linux.io_bus).clone(),
3060 (*linux.mmio_bus).clone(),
Daniel Verkamp6b298582021-08-16 15:37:11 -07003061 exit_evt.try_clone().context("failed to clone event")?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003062 linux.vm.check_capability(VmCap::PvClockSuspend),
Dylan Reidb0492662019-05-17 14:50:13 -07003063 from_main_channel,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003064 use_hypervisor_signals,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003065 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
3066 to_gdb_channel.clone(),
Yusuke Sato31e136a2021-08-18 11:51:38 -07003067 enable_per_vm_core_scheduling,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08003068 host_cpu_topology,
Zach Reizner55a9e502018-10-03 10:22:32 -07003069 )?;
Dylan Reidb0492662019-05-17 14:50:13 -07003070 vcpu_handles.push((handle, to_vcpu_channel));
Dylan Reid059a1882018-07-23 17:58:09 -07003071 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07003072
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003073 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
3074 // Spawn GDB thread.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003075 if let Some((gdb_port_num, gdb_control_tube)) = linux.gdb.take() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003076 let to_vcpu_channels = vcpu_handles
3077 .iter()
3078 .map(|(_handle, channel)| channel.clone())
3079 .collect();
3080 let target = GdbStub::new(
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003081 gdb_control_tube,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003082 to_vcpu_channels,
3083 from_vcpu_channel.unwrap(), // Must succeed to unwrap()
3084 );
3085 thread::Builder::new()
3086 .name("gdb".to_owned())
3087 .spawn(move || gdb_thread(target, gdb_port_num))
Daniel Verkamp6b298582021-08-16 15:37:11 -07003088 .context("failed to spawn GDB thread")?;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003089 };
3090
Dylan Reid059a1882018-07-23 17:58:09 -07003091 vcpu_thread_barrier.wait();
3092
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003093 let mut exit_state = ExitState::Stop;
Charles William Dick54045012021-07-27 19:11:53 +09003094 let mut balloon_stats_id: u64 = 0;
3095
Michael Hoylee392c462020-10-07 03:29:24 -07003096 'wait: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07003097 let events = {
Michael Hoylee392c462020-10-07 03:29:24 -07003098 match wait_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08003099 Ok(v) => v,
3100 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08003101 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003102 break;
3103 }
3104 }
3105 };
Zach Reiznera60744b2019-02-13 17:33:32 -08003106
Steven Richmanf32d0b42020-06-20 21:45:32 -07003107 if let Err(e) = linux.irq_chip.process_delayed_irq_events() {
3108 warn!("can't deliver delayed irqs: {}", e);
3109 }
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08003110
Zach Reiznera60744b2019-02-13 17:33:32 -08003111 let mut vm_control_indices_to_remove = Vec::new();
Michael Hoylee392c462020-10-07 03:29:24 -07003112 for event in events.iter().filter(|e| e.is_readable) {
3113 match event.token {
Zach Reizner5bed0d22018-03-28 02:31:11 -07003114 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08003115 info!("vcpu requested shutdown");
Michael Hoylee392c462020-10-07 03:29:24 -07003116 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08003117 }
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003118 Token::Reset => {
3119 info!("vcpu requested reset");
3120 exit_state = ExitState::Reset;
3121 break 'wait;
3122 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003123 Token::Suspend => {
3124 info!("VM requested suspend");
3125 linux.suspend_evt.read().unwrap();
Zach Reiznerdc748482021-04-14 13:59:30 -07003126 kick_all_vcpus(
3127 &vcpu_handles,
3128 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003129 VcpuControl::RunState(VmRunMode::Suspending),
Zach Reiznerdc748482021-04-14 13:59:30 -07003130 );
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003131 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003132 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08003133 // Print all available siginfo structs, then exit the loop.
Daniel Verkamp6b298582021-08-16 15:37:11 -07003134 while let Some(siginfo) =
3135 sigchld_fd.read().context("failed to create signalfd")?
3136 {
Zach Reizner3ba00982019-01-23 19:04:43 -08003137 let pid = siginfo.ssi_pid;
3138 let pid_label = match linux.pid_debug_label_map.get(&pid) {
3139 Some(label) => format!("{} (pid {})", label, pid),
3140 None => format!("pid {}", pid),
3141 };
David Tolnayf5032762018-12-03 10:46:45 -08003142 error!(
3143 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08003144 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08003145 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08003146 }
Michael Hoylee392c462020-10-07 03:29:24 -07003147 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08003148 }
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003149 Token::IrqFd { index } => {
3150 if let Err(e) = linux.irq_chip.service_irq_event(index) {
3151 error!("failed to signal irq {}: {}", index, e);
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08003152 }
3153 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003154 Token::VmControlServer => {
3155 if let Some(socket_server) = &control_server_socket {
3156 match socket_server.accept() {
3157 Ok(socket) => {
Michael Hoylee392c462020-10-07 03:29:24 -07003158 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08003159 .add(
3160 &socket,
3161 Token::VmControl {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003162 index: control_tubes.len(),
Zach Reiznera60744b2019-02-13 17:33:32 -08003163 },
3164 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07003165 .context("failed to add descriptor to wait context")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003166 control_tubes.push(TaggedControlTube::Vm(Tube::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08003167 }
3168 Err(e) => error!("failed to accept socket: {}", e),
3169 }
3170 }
3171 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003172 Token::VmControl { index } => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003173 if let Some(socket) = control_tubes.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003174 match socket {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003175 TaggedControlTube::Vm(tube) => match tube.recv::<VmRequest>() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003176 Ok(request) => {
3177 let mut run_mode_opt = None;
3178 let response = request.execute(
3179 &mut run_mode_opt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003180 &balloon_host_tube,
Charles William Dick54045012021-07-27 19:11:53 +09003181 &mut balloon_stats_id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003182 disk_host_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07003183 #[cfg(feature = "usb")]
3184 Some(&usb_control_tube),
3185 #[cfg(not(feature = "usb"))]
3186 None,
Chuanxiao Dong256be3a2020-04-27 16:39:33 +08003187 &mut linux.bat_control,
Suleiman Souhlal2ac78b92021-02-01 12:33:26 +09003188 &vcpu_handles,
Jakub Starond99cd0a2019-04-11 14:09:39 -07003189 );
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003190 if let Err(e) = tube.send(&response) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003191 error!("failed to send VmResponse: {}", e);
3192 }
3193 if let Some(run_mode) = run_mode_opt {
3194 info!("control socket changed run mode to {}", run_mode);
3195 match run_mode {
3196 VmRunMode::Exiting => {
Michael Hoylee392c462020-10-07 03:29:24 -07003197 break 'wait;
Jakub Starond99cd0a2019-04-11 14:09:39 -07003198 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003199 other => {
Chuanxiao Dong2bbe85c2020-11-12 17:18:07 +08003200 if other == VmRunMode::Running {
Daniel Verkampda4e8a92021-07-21 13:49:02 -07003201 for dev in &linux.resume_notify_devices {
3202 dev.lock().resume_imminent();
3203 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003204 }
Steven Richman11dc6712020-09-02 15:39:14 -07003205 kick_all_vcpus(
3206 &vcpu_handles,
Zach Reiznerdc748482021-04-14 13:59:30 -07003207 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003208 VcpuControl::RunState(other),
Steven Richman11dc6712020-09-02 15:39:14 -07003209 );
Zach Reizner6a8fdd92019-01-16 14:38:41 -08003210 }
3211 }
3212 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003213 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003214 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003215 if let TubeError::Disconnected = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003216 vm_control_indices_to_remove.push(index);
3217 } else {
3218 error!("failed to recv VmRequest: {}", e);
3219 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003220 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003221 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003222 TaggedControlTube::VmMemory(tube) => {
3223 match tube.recv::<VmMemoryRequest>() {
3224 Ok(request) => {
3225 let response = request.execute(
3226 &mut linux.vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07003227 &mut sys_allocator,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003228 Arc::clone(&map_request),
3229 &mut gralloc,
3230 );
3231 if let Err(e) = tube.send(&response) {
3232 error!("failed to send VmMemoryControlResponse: {}", e);
3233 }
3234 }
3235 Err(e) => {
3236 if let TubeError::Disconnected = e {
3237 vm_control_indices_to_remove.push(index);
3238 } else {
3239 error!("failed to recv VmMemoryControlRequest: {}", e);
3240 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003241 }
3242 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003243 }
3244 TaggedControlTube::VmIrq(tube) => match tube.recv::<VmIrqRequest>() {
Xiong Zhang2515b752019-09-19 10:29:02 +08003245 Ok(request) => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07003246 let response = {
3247 let irq_chip = &mut linux.irq_chip;
3248 request.execute(
3249 |setup| match setup {
3250 IrqSetup::Event(irq, ev) => {
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003251 if let Some(event_index) = irq_chip
3252 .register_irq_event(irq, ev, None)?
3253 {
3254 match wait_ctx.add(
3255 ev,
3256 Token::IrqFd {
3257 index: event_index
3258 },
3259 ) {
3260 Err(e) => {
3261 warn!("failed to add IrqFd to poll context: {}", e);
3262 Err(e)
3263 },
3264 Ok(_) => {
3265 Ok(())
3266 }
3267 }
3268 } else {
3269 Ok(())
3270 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07003271 }
3272 IrqSetup::Route(route) => irq_chip.route_irq(route),
3273 },
Zach Reiznerdc748482021-04-14 13:59:30 -07003274 &mut sys_allocator,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003275 )
3276 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003277 if let Err(e) = tube.send(&response) {
Xiong Zhang2515b752019-09-19 10:29:02 +08003278 error!("failed to send VmIrqResponse: {}", e);
3279 }
3280 }
3281 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003282 if let TubeError::Disconnected = e {
Xiong Zhang2515b752019-09-19 10:29:02 +08003283 vm_control_indices_to_remove.push(index);
3284 } else {
3285 error!("failed to recv VmIrqRequest: {}", e);
3286 }
3287 }
3288 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003289 TaggedControlTube::VmMsync(tube) => {
3290 match tube.recv::<VmMsyncRequest>() {
3291 Ok(request) => {
3292 let response = request.execute(&mut linux.vm);
3293 if let Err(e) = tube.send(&response) {
3294 error!("failed to send VmMsyncResponse: {}", e);
3295 }
3296 }
3297 Err(e) => {
3298 if let TubeError::Disconnected = e {
3299 vm_control_indices_to_remove.push(index);
3300 } else {
3301 error!("failed to recv VmMsyncRequest: {}", e);
3302 }
Daniel Verkampe1980a92020-02-07 11:00:55 -08003303 }
3304 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003305 }
3306 TaggedControlTube::Fs(tube) => match tube.recv::<FsMappingRequest>() {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003307 Ok(request) => {
3308 let response =
Zach Reiznerdc748482021-04-14 13:59:30 -07003309 request.execute(&mut linux.vm, &mut sys_allocator);
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003310 if let Err(e) = tube.send(&response) {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003311 error!("failed to send VmResponse: {}", e);
3312 }
3313 }
3314 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003315 if let TubeError::Disconnected = e {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003316 vm_control_indices_to_remove.push(index);
3317 } else {
3318 error!("failed to recv VmResponse: {}", e);
3319 }
3320 }
3321 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08003322 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003323 }
3324 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003325 }
3326 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003327
Vikram Auradkarede68c72021-07-01 14:33:54 -07003328 // It's possible more data is readable and buffered while the socket is hungup,
3329 // so don't delete the tube from the poll context until we're sure all the
3330 // data is read.
3331 // Below case covers a condition where we have received a hungup event and the tube is not
3332 // readable.
3333 // In case of readable tube, once all data is read, any attempt to read more data on hungup
3334 // tube should fail. On such failure, we get Disconnected error and index gets added to
3335 // vm_control_indices_to_remove by the time we reach here.
3336 for event in events.iter().filter(|e| e.is_hungup && !e.is_readable) {
3337 if let Token::VmControl { index } = event.token {
3338 vm_control_indices_to_remove.push(index);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003339 }
3340 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003341
3342 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08003343 // preserves correct indexes as each element is removed.
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07003344 vm_control_indices_to_remove.sort_unstable_by_key(|&k| Reverse(k));
Zach Reiznera60744b2019-02-13 17:33:32 -08003345 vm_control_indices_to_remove.dedup();
3346 for index in vm_control_indices_to_remove {
Michael Hoylee392c462020-10-07 03:29:24 -07003347 // Delete the socket from the `wait_ctx` synchronously. Otherwise, the kernel will do
3348 // this automatically when the FD inserted into the `wait_ctx` is closed after this
Zide Chen89584072019-11-14 10:33:51 -08003349 // if-block, but this removal can be deferred unpredictably. In some instances where the
Michael Hoylee392c462020-10-07 03:29:24 -07003350 // system is under heavy load, we can even get events returned by `wait_ctx` for an FD
Zide Chen89584072019-11-14 10:33:51 -08003351 // that has already been closed. Because the token associated with that spurious event
3352 // now belongs to a different socket, the control loop will start to interact with
3353 // sockets that might not be ready to use. This can cause incorrect hangup detection or
3354 // blocking on a socket that will never be ready. See also: crbug.com/1019986
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003355 if let Some(socket) = control_tubes.get(index) {
Daniel Verkamp6b298582021-08-16 15:37:11 -07003356 wait_ctx
3357 .delete(socket)
3358 .context("failed to remove descriptor from wait context")?;
Zide Chen89584072019-11-14 10:33:51 -08003359 }
3360
3361 // This line implicitly drops the socket at `index` when it gets returned by
3362 // `swap_remove`. After this line, the socket at `index` is not the one from
3363 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
Michael Hoylee392c462020-10-07 03:29:24 -07003364 // use `wait_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003365 control_tubes.swap_remove(index);
3366 if let Some(tube) = control_tubes.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07003367 wait_ctx
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003368 .modify(tube, EventType::Read, Token::VmControl { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07003369 .context("failed to add descriptor to wait context")?;
Zach Reiznera60744b2019-02-13 17:33:32 -08003370 }
3371 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003372 }
3373
Zach Reiznerdc748482021-04-14 13:59:30 -07003374 kick_all_vcpus(
3375 &vcpu_handles,
3376 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003377 VcpuControl::RunState(VmRunMode::Exiting),
Zach Reiznerdc748482021-04-14 13:59:30 -07003378 );
Steven Richman11dc6712020-09-02 15:39:14 -07003379 for (handle, _) in vcpu_handles {
3380 if let Err(e) = handle.join() {
3381 error!("failed to join vcpu thread: {:?}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003382 }
3383 }
3384
Daniel Verkamp94c35272019-09-12 13:31:30 -07003385 // Explicitly drop the VM structure here to allow the devices to clean up before the
3386 // control sockets are closed when this function exits.
3387 mem::drop(linux);
3388
Zach Reizner19ad1f32019-12-12 18:58:50 -08003389 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08003390 .set_canon_mode()
3391 .expect("failed to restore canonical mode for terminal");
3392
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003393 Ok(exit_state)
Zach Reizner39aa26b2017-12-12 18:03:23 -08003394}