blob: c5554827f47dadde44393bd91b69dba3dc82a8b3 [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Hikaru Nishida584e52c2021-04-27 17:37:08 +09005use std::cmp::Reverse;
Zide Chendfc4b882021-03-10 16:35:37 -08006use std::collections::BTreeMap;
Jakub Starona3411ea2019-04-24 10:55:25 -07007use std::convert::TryFrom;
John Batesb220eac2020-09-14 17:03:02 -07008#[cfg(feature = "gpu")]
9use std::env;
Dylan Reid059a1882018-07-23 17:58:09 -070010use std::fs::{File, OpenOptions};
Federico 'Morg' Pareschia1184822021-09-09 10:52:58 +090011use std::io::stdin;
Steven Richmanf32d0b42020-06-20 21:45:32 -070012use std::iter;
Daniel Verkamp94c35272019-09-12 13:31:30 -070013use std::mem;
David Tolnay2b089fc2019-03-04 15:33:22 -080014use std::net::Ipv4Addr;
Zach Reiznera60744b2019-02-13 17:33:32 -080015use std::os::unix::net::UnixStream;
Zach Reizner39aa26b2017-12-12 18:03:23 -080016use std::path::{Path, PathBuf};
Chirantan Ekbote448516e2018-07-24 16:07:42 -070017use std::str;
Dylan Reidb0492662019-05-17 14:50:13 -070018use std::sync::{mpsc, Arc, Barrier};
Hikaru Nishida584e52c2021-04-27 17:37:08 +090019use std::time::Duration;
Dylan Reidb0492662019-05-17 14:50:13 -070020
Zach Reizner39aa26b2017-12-12 18:03:23 -080021use std::thread;
22use std::thread::JoinHandle;
23
Peter Fangad3b24e2021-06-21 00:43:29 -070024use libc::{self, c_int, gid_t, uid_t, EINVAL};
Zach Reizner39aa26b2017-12-12 18:03:23 -080025
Tomasz Jeznach42644642020-05-20 23:27:59 -070026use acpi_tables::sdt::SDT;
27
Richard5afeafa2021-07-26 19:02:09 -070028use crate::error::{Error, Result};
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +090029use base::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080030use base::*;
Keiichi Watanabe553d2192021-08-16 16:42:27 +090031use devices::serial_device::{SerialHardware, SerialParameters};
Zide Chenafdb9382021-06-17 12:04:43 -070032use devices::vfio::{VfioCommonSetup, VfioCommonTrait};
Woody Chow0b2b6062021-09-03 15:40:02 +090033#[cfg(feature = "audio_cras")]
34use devices::virtio::snd::cras_backend::Parameters as CrasSndParameters;
Keiichi Watanabefb36e0c2021-08-13 18:48:31 +090035use devices::virtio::vhost::user::vmm::{
Richard5afeafa2021-07-26 19:02:09 -070036 Block as VhostUserBlock, Console as VhostUserConsole, Fs as VhostUserFs,
37 Mac80211Hwsim as VhostUserMac80211Hwsim, Net as VhostUserNet, Wl as VhostUserWl,
Keiichi Watanabe60686582021-03-12 04:53:51 +090038};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070039use devices::virtio::{self, Console, VirtioDevice};
Chirantan Ekbote44292f52021-06-25 18:31:41 +090040#[cfg(feature = "gpu")]
41use devices::virtio::{
42 gpu::{DEFAULT_DISPLAY_HEIGHT, DEFAULT_DISPLAY_WIDTH},
43 vhost::user::vmm::Gpu as VhostUserGpu,
44 EventDevice,
45};
paulhsiace17e6e2020-08-28 18:37:45 +080046#[cfg(feature = "audio")]
47use devices::Ac97Dev;
Will Deaconc48e7832021-07-30 19:03:06 +010048use devices::ProtectionType;
Xiong Zhang17b0daf2019-04-23 17:14:50 +080049use devices::{
Tomasz Nowickiab86d522021-09-22 05:50:46 +000050 self, BusDeviceObj, HostHotPlugKey, IrqChip, IrqEventIndex, KvmKernelIrqChip, PciAddress,
Tomasz Nowicki344eb142021-09-22 05:51:58 +000051 PciDevice, VcpuRunState, VfioContainer, VfioDevice, VfioPciDevice, VfioPlatformDevice,
52 VirtioPciDevice,
Xiong Zhang17b0daf2019-04-23 17:14:50 +080053};
Daniel Verkampf1439d42021-05-21 13:55:10 -070054#[cfg(feature = "usb")]
55use devices::{HostBackendDeviceProvider, XhciController};
Steven Richmanf32d0b42020-06-20 21:45:32 -070056use hypervisor::kvm::{Kvm, KvmVcpu, KvmVm};
Xiong Zhangdea7dbb2021-07-26 14:49:03 +080057use hypervisor::{HypervisorCap, Vcpu, VcpuExit, VcpuRunHandle, Vm, VmCap};
Allen Webbf3024c82020-06-19 07:19:48 -070058use minijail::{self, Minijail};
Richard5afeafa2021-07-26 19:02:09 -070059use net_util::{MacAddress, Tap};
Xiong Zhang87a3b442019-10-29 17:32:44 +080060use resources::{Alloc, MmioType, SystemAllocator};
Gurchetan Singh293913c2020-12-09 10:44:13 -080061use rutabaga_gfx::RutabagaGralloc;
Dylan Reidb0492662019-05-17 14:50:13 -070062use sync::Mutex;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080063use vm_control::*;
Sergey Senozhatskyd78d05b2021-04-13 20:59:58 +090064use vm_memory::{GuestAddress, GuestMemory, MemoryPolicy};
Zach Reizner39aa26b2017-12-12 18:03:23 -080065
Keiichi Watanabec5262e92020-10-21 15:57:33 +090066#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
67use crate::gdb::{gdb_thread, GdbStub};
Keiichi Watanabef3a37f42021-01-21 15:41:11 +090068use crate::{
Tomasz Nowicki71aca792021-06-09 18:53:49 +000069 Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption, VfioType,
70 VhostUserFsOption, VhostUserOption, VhostUserWlOption,
Keiichi Watanabef3a37f42021-01-21 15:41:11 +090071};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070072use arch::{
Keiichi Watanabe553d2192021-08-16 16:42:27 +090073 self, LinuxArch, RunnableLinuxVm, VcpuAffinity, VirtioDeviceStub, VmComponents, VmImage,
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070074};
Sonny Raoed517d12018-02-13 22:09:43 -080075
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080076#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070077use {
78 aarch64::AArch64 as Arch,
Steven Richman11dc6712020-09-02 15:39:14 -070079 devices::IrqChipAArch64 as IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -070080 hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
81};
Zach Reizner55a9e502018-10-03 10:22:32 -070082#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070083use {
Steven Richman11dc6712020-09-02 15:39:14 -070084 devices::{IrqChipX86_64 as IrqChipArch, KvmSplitIrqChip},
85 hypervisor::{VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
Steven Richmanf32d0b42020-06-20 21:45:32 -070086 x86_64::X8664arch as Arch,
87};
Zach Reizner39aa26b2017-12-12 18:03:23 -080088
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080089enum TaggedControlTube {
90 Fs(Tube),
91 Vm(Tube),
92 VmMemory(Tube),
93 VmIrq(Tube),
94 VmMsync(Tube),
Jakub Starond99cd0a2019-04-11 14:09:39 -070095}
96
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080097impl AsRef<Tube> for TaggedControlTube {
98 fn as_ref(&self) -> &Tube {
99 use self::TaggedControlTube::*;
Jakub Starond99cd0a2019-04-11 14:09:39 -0700100 match &self {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800101 Fs(tube) | Vm(tube) | VmMemory(tube) | VmIrq(tube) | VmMsync(tube) => tube,
Jakub Starond99cd0a2019-04-11 14:09:39 -0700102 }
103 }
104}
105
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800106impl AsRawDescriptor for TaggedControlTube {
Michael Hoylee392c462020-10-07 03:29:24 -0700107 fn as_raw_descriptor(&self) -> RawDescriptor {
Michael Hoylea596a072020-11-10 19:32:45 -0800108 self.as_ref().as_raw_descriptor()
Jakub Starond99cd0a2019-04-11 14:09:39 -0700109 }
110}
111
Matt Delcoc24ad782020-02-14 13:24:36 -0800112struct SandboxConfig<'a> {
113 limit_caps: bool,
114 log_failures: bool,
115 seccomp_policy: &'a Path,
116 uid_map: Option<&'a str>,
117 gid_map: Option<&'a str>,
118}
119
Zach Reizner44863792019-06-26 14:22:08 -0700120fn create_base_minijail(
121 root: &Path,
Matt Delcoc24ad782020-02-14 13:24:36 -0800122 r_limit: Option<u64>,
123 config: Option<&SandboxConfig>,
Zach Reizner44863792019-06-26 14:22:08 -0700124) -> Result<Minijail> {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800125 // All child jails run in a new user namespace without any users mapped,
126 // they run as nobody unless otherwise configured.
David Tolnay5bbbf612018-12-01 17:49:30 -0800127 let mut j = Minijail::new().map_err(Error::DeviceJail)?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800128
129 if let Some(config) = config {
130 j.namespace_pids();
131 j.namespace_user();
132 j.namespace_user_disable_setgroups();
133 if config.limit_caps {
134 // Don't need any capabilities.
135 j.use_caps(0);
136 }
137 if let Some(uid_map) = config.uid_map {
138 j.uidmap(uid_map).map_err(Error::SettingUidMap)?;
139 }
140 if let Some(gid_map) = config.gid_map {
141 j.gidmap(gid_map).map_err(Error::SettingGidMap)?;
142 }
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900143 // Run in a new mount namespace.
144 j.namespace_vfs();
145
Matt Delcoc24ad782020-02-14 13:24:36 -0800146 // Run in an empty network namespace.
147 j.namespace_net();
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900148
149 // Don't allow the device to gain new privileges.
Matt Delcoc24ad782020-02-14 13:24:36 -0800150 j.no_new_privs();
151
152 // By default we'll prioritize using the pre-compiled .bpf over the .policy
153 // file (the .bpf is expected to be compiled using "trap" as the failure
154 // behavior instead of the default "kill" behavior).
155 // Refer to the code comment for the "seccomp-log-failures"
156 // command-line parameter for an explanation about why the |log_failures|
157 // flag forces the use of .policy files (and the build-time alternative to
158 // this run-time flag).
159 let bpf_policy_file = config.seccomp_policy.with_extension("bpf");
160 if bpf_policy_file.exists() && !config.log_failures {
161 j.parse_seccomp_program(&bpf_policy_file)
162 .map_err(Error::DeviceJail)?;
163 } else {
164 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP,
165 // which will correctly kill the entire device process if a worker
166 // thread commits a seccomp violation.
167 j.set_seccomp_filter_tsync();
168 if config.log_failures {
169 j.log_seccomp_filter_failures();
170 }
171 j.parse_seccomp_filters(&config.seccomp_policy.with_extension("policy"))
172 .map_err(Error::DeviceJail)?;
173 }
174 j.use_seccomp_filter();
175 // Don't do init setup.
176 j.run_as_init();
177 }
178
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900179 // Only pivot_root if we are not re-using the current root directory.
180 if root != Path::new("/") {
181 // It's safe to call `namespace_vfs` multiple times.
182 j.namespace_vfs();
183 j.enter_pivot_root(root).map_err(Error::DevicePivotRoot)?;
184 }
Matt Delco45caf912019-11-13 08:11:09 -0800185
Matt Delcoc24ad782020-02-14 13:24:36 -0800186 // Most devices don't need to open many fds.
187 let limit = if let Some(r) = r_limit { r } else { 1024u64 };
188 j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)
189 .map_err(Error::SettingMaxOpenFiles)?;
190
Zach Reizner39aa26b2017-12-12 18:03:23 -0800191 Ok(j)
192}
193
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800194fn simple_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
Lepton Wu9105e9f2019-03-14 11:38:31 -0700195 if cfg.sandbox {
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800196 let pivot_root: &str = option_env!("DEFAULT_PIVOT_ROOT").unwrap_or("/var/empty");
197 // A directory for a jailed device's pivot root.
198 let root_path = Path::new(pivot_root);
199 if !root_path.exists() {
200 return Err(Error::PivotRootDoesntExist(pivot_root));
201 }
202 let policy_path: PathBuf = cfg.seccomp_policy_dir.join(policy);
Matt Delcoc24ad782020-02-14 13:24:36 -0800203 let config = SandboxConfig {
204 limit_caps: true,
205 log_failures: cfg.seccomp_log_failures,
206 seccomp_policy: &policy_path,
207 uid_map: None,
208 gid_map: None,
209 };
210 Ok(Some(create_base_minijail(root_path, None, Some(&config))?))
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800211 } else {
212 Ok(None)
213 }
214}
215
David Tolnayfd0971d2019-03-04 17:15:57 -0800216type DeviceResult<T = VirtioDeviceStub> = std::result::Result<T, Error>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800217
Andrew Walbran4cad30a2021-06-28 15:58:08 +0000218fn create_block_device(cfg: &Config, disk: &DiskOption, disk_device_tube: Tube) -> DeviceResult {
Junichi Uekawa7bea39f2021-07-16 14:05:06 +0900219 let raw_image: File = open_file(&disk.path, disk.read_only, disk.o_direct)
Andrew Walbranbc55e302021-07-13 17:35:10 +0100220 .map_err(|e| Error::Disk(disk.path.clone(), e.into()))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800221 // Lock the disk image to prevent other crosvm instances from using it.
222 let lock_op = if disk.read_only {
223 FlockOperation::LockShared
224 } else {
225 FlockOperation::LockExclusive
226 };
227 flock(&raw_image, lock_op, true).map_err(Error::DiskImageLock)?;
228
Junichi Uekawa52437db2021-09-29 17:33:07 +0900229 info!("Trying to attach block device: {}", disk.path.display());
230 let dev = if disk::async_ok(&raw_image).map_err(Error::CreateDiskCheckAsyncOkError)? {
231 let async_file =
232 disk::create_async_disk_file(raw_image).map_err(Error::CreateAsyncDiskError)?;
Dylan Reid503c5ab2020-07-17 11:20:07 -0700233 Box::new(
234 virtio::BlockAsync::new(
235 virtio::base_features(cfg.protected_vm),
236 async_file,
237 disk.read_only,
238 disk.sparse,
239 disk.block_size,
Daniel Verkampdd0ee592021-03-29 13:05:22 -0700240 disk.id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800241 Some(disk_device_tube),
Dylan Reid503c5ab2020-07-17 11:20:07 -0700242 )
243 .map_err(Error::BlockDeviceNew)?,
244 ) as Box<dyn VirtioDevice>
245 } else {
Daniel Verkampeb1640e2021-09-07 14:09:31 -0700246 let disk_file = disk::create_disk_file(raw_image, disk::MAX_NESTING_DEPTH)
247 .map_err(Error::CreateDiskError)?;
Dylan Reid503c5ab2020-07-17 11:20:07 -0700248 Box::new(
249 virtio::Block::new(
250 virtio::base_features(cfg.protected_vm),
251 disk_file,
252 disk.read_only,
253 disk.sparse,
254 disk.block_size,
255 disk.id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800256 Some(disk_device_tube),
Dylan Reid503c5ab2020-07-17 11:20:07 -0700257 )
258 .map_err(Error::BlockDeviceNew)?,
259 ) as Box<dyn VirtioDevice>
260 };
David Tolnay2b089fc2019-03-04 15:33:22 -0800261
262 Ok(VirtioDeviceStub {
Dylan Reid503c5ab2020-07-17 11:20:07 -0700263 dev,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700264 jail: simple_jail(cfg, "block_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800265 })
266}
267
Keiichi Watanabef3a37f42021-01-21 15:41:11 +0900268fn create_vhost_user_block_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
269 let dev = VhostUserBlock::new(virtio::base_features(cfg.protected_vm), &opt.socket)
270 .map_err(Error::VhostUserBlockDeviceNew)?;
271
272 Ok(VirtioDeviceStub {
273 dev: Box::new(dev),
274 // no sandbox here because virtqueue handling is exported to a different process.
275 jail: None,
276 })
277}
278
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +0900279fn create_vhost_user_console_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
280 let dev = VhostUserConsole::new(virtio::base_features(cfg.protected_vm), &opt.socket)
281 .map_err(Error::VhostUserConsoleDeviceNew)?;
282
283 Ok(VirtioDeviceStub {
284 dev: Box::new(dev),
285 // no sandbox here because virtqueue handling is exported to a different process.
286 jail: None,
287 })
288}
289
Woody Chow5890b702021-02-12 14:57:02 +0900290fn create_vhost_user_fs_device(cfg: &Config, option: &VhostUserFsOption) -> DeviceResult {
291 let dev = VhostUserFs::new(
292 virtio::base_features(cfg.protected_vm),
293 &option.socket,
294 &option.tag,
295 )
296 .map_err(Error::VhostUserFsDeviceNew)?;
297
298 Ok(VirtioDeviceStub {
299 dev: Box::new(dev),
300 // no sandbox here because virtqueue handling is exported to a different process.
301 jail: None,
302 })
303}
304
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900305fn create_vhost_user_mac80211_hwsim_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
306 let dev = VhostUserMac80211Hwsim::new(virtio::base_features(cfg.protected_vm), &opt.socket)
307 .map_err(Error::VhostUserMac80211HwsimNew)?;
308
309 Ok(VirtioDeviceStub {
310 dev: Box::new(dev),
311 // no sandbox here because virtqueue handling is exported to a different process.
312 jail: None,
313 })
314}
315
David Tolnay2b089fc2019-03-04 15:33:22 -0800316fn create_rng_device(cfg: &Config) -> DeviceResult {
Keiichi Watanabef70350b2020-11-24 21:57:53 +0900317 let dev =
318 virtio::Rng::new(virtio::base_features(cfg.protected_vm)).map_err(Error::RngDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800319
320 Ok(VirtioDeviceStub {
321 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700322 jail: simple_jail(cfg, "rng_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800323 })
324}
325
Woody Chow737ff122021-03-22 17:49:57 +0900326#[cfg(feature = "audio_cras")]
Woody Chow0b2b6062021-09-03 15:40:02 +0900327fn create_cras_snd_device(cfg: &Config, cras_snd: CrasSndParameters) -> DeviceResult {
328 let dev = virtio::snd::cras_backend::VirtioSndCras::new(
329 virtio::base_features(cfg.protected_vm),
330 cras_snd,
331 )
332 .map_err(Error::CrasSoundDeviceNew)?;
Woody Chow737ff122021-03-22 17:49:57 +0900333
334 let jail = match simple_jail(&cfg, "cras_snd_device")? {
335 Some(mut jail) => {
336 // Create a tmpfs in the device's root directory for cras_snd_device.
337 // The size is 20*1024, or 20 KB.
338 jail.mount_with_data(
339 Path::new("none"),
340 Path::new("/"),
341 "tmpfs",
342 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
343 "size=20480",
344 )?;
345
346 let run_cras_path = Path::new("/run/cras");
347 jail.mount_bind(run_cras_path, run_cras_path, true)?;
348
349 add_current_user_to_jail(&mut jail)?;
350
351 Some(jail)
352 }
353 None => None,
354 };
355
356 Ok(VirtioDeviceStub {
357 dev: Box::new(dev),
358 jail,
359 })
360}
361
David Tolnay2b089fc2019-03-04 15:33:22 -0800362#[cfg(feature = "tpm")]
363fn create_tpm_device(cfg: &Config) -> DeviceResult {
364 use std::ffi::CString;
365 use std::fs;
366 use std::process;
David Tolnay2b089fc2019-03-04 15:33:22 -0800367
368 let tpm_storage: PathBuf;
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700369 let mut tpm_jail = simple_jail(cfg, "tpm_device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800370
371 match &mut tpm_jail {
372 Some(jail) => {
373 // Create a tmpfs in the device's root directory for tpm
374 // simulator storage. The size is 20*1024, or 20 KB.
375 jail.mount_with_data(
376 Path::new("none"),
377 Path::new("/"),
378 "tmpfs",
379 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
380 "size=20480",
381 )?;
382
Fergus Dall51200512021-08-19 12:54:26 +1000383 let crosvm_ids = add_current_user_to_jail(jail)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800384
385 let pid = process::id();
386 let tpm_pid_dir = format!("/run/vm/tpm.{}", pid);
387 tpm_storage = Path::new(&tpm_pid_dir).to_owned();
David Tolnayfd0971d2019-03-04 17:15:57 -0800388 fs::create_dir_all(&tpm_storage)
389 .map_err(|e| Error::CreateTpmStorage(tpm_storage.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800390 let tpm_pid_dir_c = CString::new(tpm_pid_dir).expect("no nul bytes");
David Tolnayfd0971d2019-03-04 17:15:57 -0800391 chown(&tpm_pid_dir_c, crosvm_ids.uid, crosvm_ids.gid)
392 .map_err(Error::ChownTpmStorage)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800393
394 jail.mount_bind(&tpm_storage, &tpm_storage, true)?;
395 }
396 None => {
397 // Path used inside cros_sdk which does not have /run/vm.
398 tpm_storage = Path::new("/tmp/tpm-simulator").to_owned();
399 }
400 }
401
402 let dev = virtio::Tpm::new(tpm_storage);
403
404 Ok(VirtioDeviceStub {
405 dev: Box::new(dev),
406 jail: tpm_jail,
407 })
408}
409
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700410fn create_single_touch_device(
411 cfg: &Config,
412 single_touch_spec: &TouchDeviceOption,
413 idx: u32,
414) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800415 let socket = single_touch_spec
416 .get_path()
417 .into_unix_stream()
418 .map_err(|e| {
419 error!("failed configuring virtio single touch: {:?}", e);
420 e
421 })?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800422
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800423 let (width, height) = single_touch_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700424 let dev = virtio::new_single_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700425 idx,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700426 socket,
427 width,
428 height,
429 virtio::base_features(cfg.protected_vm),
430 )
431 .map_err(Error::InputDeviceNew)?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800432 Ok(VirtioDeviceStub {
433 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700434 jail: simple_jail(cfg, "input_device")?,
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800435 })
436}
437
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700438fn create_multi_touch_device(
439 cfg: &Config,
440 multi_touch_spec: &TouchDeviceOption,
441 idx: u32,
442) -> DeviceResult {
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000443 let socket = multi_touch_spec
444 .get_path()
445 .into_unix_stream()
446 .map_err(|e| {
447 error!("failed configuring virtio multi touch: {:?}", e);
448 e
449 })?;
450
451 let (width, height) = multi_touch_spec.get_size();
452 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700453 idx,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000454 socket,
455 width,
456 height,
457 virtio::base_features(cfg.protected_vm),
458 )
459 .map_err(Error::InputDeviceNew)?;
460
461 Ok(VirtioDeviceStub {
462 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700463 jail: simple_jail(cfg, "input_device")?,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000464 })
465}
466
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700467fn create_trackpad_device(
468 cfg: &Config,
469 trackpad_spec: &TouchDeviceOption,
470 idx: u32,
471) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800472 let socket = trackpad_spec.get_path().into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800473 error!("failed configuring virtio trackpad: {}", e);
474 e
475 })?;
476
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800477 let (width, height) = trackpad_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700478 let dev = virtio::new_trackpad(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700479 idx,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700480 socket,
481 width,
482 height,
483 virtio::base_features(cfg.protected_vm),
484 )
485 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800486
487 Ok(VirtioDeviceStub {
488 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700489 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800490 })
491}
492
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700493fn create_mouse_device<T: IntoUnixStream>(cfg: &Config, mouse_socket: T, idx: u32) -> DeviceResult {
Zach Reizner65b98f12019-11-22 17:34:58 -0800494 let socket = mouse_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800495 error!("failed configuring virtio mouse: {}", e);
496 e
497 })?;
498
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700499 let dev = virtio::new_mouse(idx, socket, virtio::base_features(cfg.protected_vm))
Noah Goldd4ca29b2020-10-27 12:21:52 -0700500 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800501
502 Ok(VirtioDeviceStub {
503 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700504 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800505 })
506}
507
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700508fn create_keyboard_device<T: IntoUnixStream>(
509 cfg: &Config,
510 keyboard_socket: T,
511 idx: u32,
512) -> DeviceResult {
Zach Reizner65b98f12019-11-22 17:34:58 -0800513 let socket = keyboard_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800514 error!("failed configuring virtio keyboard: {}", e);
515 e
516 })?;
517
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700518 let dev = virtio::new_keyboard(idx, socket, virtio::base_features(cfg.protected_vm))
Noah Goldd4ca29b2020-10-27 12:21:52 -0700519 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800520
521 Ok(VirtioDeviceStub {
522 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700523 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800524 })
525}
526
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700527fn create_switches_device<T: IntoUnixStream>(
528 cfg: &Config,
529 switches_socket: T,
530 idx: u32,
531) -> DeviceResult {
Daniel Norman5e23df72021-03-11 10:11:02 -0800532 let socket = switches_socket.into_unix_stream().map_err(|e| {
533 error!("failed configuring virtio switches: {}", e);
534 e
535 })?;
536
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700537 let dev = virtio::new_switches(idx, socket, virtio::base_features(cfg.protected_vm))
Daniel Norman5e23df72021-03-11 10:11:02 -0800538 .map_err(Error::InputDeviceNew)?;
539
540 Ok(VirtioDeviceStub {
541 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700542 jail: simple_jail(cfg, "input_device")?,
Daniel Norman5e23df72021-03-11 10:11:02 -0800543 })
544}
545
David Tolnay2b089fc2019-03-04 15:33:22 -0800546fn create_vinput_device(cfg: &Config, dev_path: &Path) -> DeviceResult {
547 let dev_file = OpenOptions::new()
548 .read(true)
549 .write(true)
550 .open(dev_path)
David Tolnayfd0971d2019-03-04 17:15:57 -0800551 .map_err(|e| Error::OpenVinput(dev_path.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800552
Noah Goldd4ca29b2020-10-27 12:21:52 -0700553 let dev = virtio::new_evdev(dev_file, virtio::base_features(cfg.protected_vm))
554 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800555
556 Ok(VirtioDeviceStub {
557 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700558 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800559 })
560}
561
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800562fn create_balloon_device(cfg: &Config, tube: Tube) -> DeviceResult {
563 let dev = virtio::Balloon::new(virtio::base_features(cfg.protected_vm), tube)
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100564 .map_err(Error::BalloonDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800565
566 Ok(VirtioDeviceStub {
567 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700568 jail: simple_jail(cfg, "balloon_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800569 })
570}
571
Michael Hoylea596a072020-11-10 19:32:45 -0800572fn create_tap_net_device(cfg: &Config, tap_fd: RawDescriptor) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800573 // Safe because we ensure that we get a unique handle to the fd.
574 let tap = unsafe {
Michael Hoylea596a072020-11-10 19:32:45 -0800575 Tap::from_raw_descriptor(
576 validate_raw_descriptor(tap_fd).map_err(Error::ValidateRawDescriptor)?,
577 )
578 .map_err(Error::CreateTapDevice)?
David Tolnay2b089fc2019-03-04 15:33:22 -0800579 };
580
Xiong Zhang773c7072020-03-20 10:39:55 +0800581 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
582 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700583 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800584 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
585 vq_pairs = 1;
586 }
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100587 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +0100588 let dev = virtio::Net::from(features, tap, vq_pairs).map_err(Error::NetDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800589
590 Ok(VirtioDeviceStub {
591 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700592 jail: simple_jail(cfg, "net_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800593 })
594}
595
596fn create_net_device(
597 cfg: &Config,
598 host_ip: Ipv4Addr,
599 netmask: Ipv4Addr,
600 mac_address: MacAddress,
David Tolnay2b089fc2019-03-04 15:33:22 -0800601) -> DeviceResult {
Xiong Zhang773c7072020-03-20 10:39:55 +0800602 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
603 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700604 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800605 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
606 vq_pairs = 1;
607 }
608
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100609 let features = virtio::base_features(cfg.protected_vm);
David Tolnay2b089fc2019-03-04 15:33:22 -0800610 let dev = if cfg.vhost_net {
Will Deacon81d5adb2020-10-06 18:37:48 +0100611 let dev = virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(
Christian Blichmann2f5d4b62021-03-10 18:08:08 +0100612 &cfg.vhost_net_device_path,
Will Deacon81d5adb2020-10-06 18:37:48 +0100613 features,
614 host_ip,
615 netmask,
616 mac_address,
Will Deacon81d5adb2020-10-06 18:37:48 +0100617 )
618 .map_err(Error::VhostNetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800619 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800620 } else {
Will Deacon81d5adb2020-10-06 18:37:48 +0100621 let dev = virtio::Net::<Tap>::new(features, host_ip, netmask, mac_address, vq_pairs)
Xiong Zhang773c7072020-03-20 10:39:55 +0800622 .map_err(Error::NetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800623 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800624 };
625
626 let policy = if cfg.vhost_net {
Matt Delco45caf912019-11-13 08:11:09 -0800627 "vhost_net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800628 } else {
Matt Delco45caf912019-11-13 08:11:09 -0800629 "net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800630 };
631
632 Ok(VirtioDeviceStub {
633 dev,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700634 jail: simple_jail(cfg, policy)?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800635 })
636}
637
Keiichi Watanabe60686582021-03-12 04:53:51 +0900638fn create_vhost_user_net_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
639 let dev = VhostUserNet::new(virtio::base_features(cfg.protected_vm), &opt.socket)
640 .map_err(Error::VhostUserNetDeviceNew)?;
641
642 Ok(VirtioDeviceStub {
643 dev: Box::new(dev),
644 // no sandbox here because virtqueue handling is exported to a different process.
645 jail: None,
646 })
647}
648
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900649fn create_vhost_user_wl_device(cfg: &Config, opt: &VhostUserWlOption) -> DeviceResult {
650 // The crosvm wl device expects us to connect the tube before it will accept a vhost-user
651 // connection.
652 let dev = VhostUserWl::new(virtio::base_features(cfg.protected_vm), &opt.socket)
653 .map_err(Error::VhostUserWlDeviceNew)?;
654
655 Ok(VirtioDeviceStub {
656 dev: Box::new(dev),
657 // no sandbox here because virtqueue handling is exported to a different process.
658 jail: None,
659 })
660}
661
David Tolnay2b089fc2019-03-04 15:33:22 -0800662#[cfg(feature = "gpu")]
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900663fn create_vhost_user_gpu_device(
664 cfg: &Config,
665 opt: &VhostUserOption,
666 host_tube: Tube,
667 device_tube: Tube,
668) -> DeviceResult {
669 // The crosvm gpu device expects us to connect the tube before it will accept a vhost-user
670 // connection.
671 let dev = VhostUserGpu::new(
672 virtio::base_features(cfg.protected_vm),
673 &opt.socket,
674 host_tube,
675 device_tube,
676 )
677 .map_err(Error::VhostUserGpuDeviceNew)?;
678
679 Ok(VirtioDeviceStub {
680 dev: Box::new(dev),
681 // no sandbox here because virtqueue handling is exported to a different process.
682 jail: None,
683 })
684}
685
686#[cfg(feature = "gpu")]
David Tolnay2b089fc2019-03-04 15:33:22 -0800687fn create_gpu_device(
688 cfg: &Config,
Michael Hoyle685316f2020-09-16 15:29:20 -0700689 exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800690 gpu_device_tube: Tube,
691 resource_bridges: Vec<Tube>,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900692 wayland_socket_path: Option<&PathBuf>,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700693 x_display: Option<String>,
Zach Reizner65b98f12019-11-22 17:34:58 -0800694 event_devices: Vec<EventDevice>,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700695 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800696) -> DeviceResult {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700697 let mut display_backends = vec![
698 virtio::DisplayBackend::X(x_display),
Jason Macnak60eb1fb2020-01-09 14:36:29 -0800699 virtio::DisplayBackend::Stub,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700700 ];
701
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700702 let wayland_socket_dirs = cfg
703 .wayland_socket_paths
704 .iter()
705 .map(|(_name, path)| path.parent())
706 .collect::<Option<Vec<_>>>()
707 .ok_or(Error::InvalidWaylandPath)?;
708
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900709 if let Some(socket_path) = wayland_socket_path {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700710 display_backends.insert(
711 0,
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700712 virtio::DisplayBackend::Wayland(Some(socket_path.to_owned())),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700713 );
714 }
715
David Tolnay2b089fc2019-03-04 15:33:22 -0800716 let dev = virtio::Gpu::new(
Michael Hoyle685316f2020-09-16 15:29:20 -0700717 exit_evt.try_clone().map_err(Error::CloneEvent)?,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800718 Some(gpu_device_tube),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800719 resource_bridges,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700720 display_backends,
Jason Macnakcc7070b2019-11-06 14:48:12 -0800721 cfg.gpu_parameters.as_ref().unwrap(),
Zach Reizner65b98f12019-11-22 17:34:58 -0800722 event_devices,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700723 map_request,
724 cfg.sandbox,
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100725 virtio::base_features(cfg.protected_vm),
Gurchetan Singh781d9752021-02-15 17:45:22 -0800726 cfg.wayland_socket_paths.clone(),
David Tolnay2b089fc2019-03-04 15:33:22 -0800727 );
728
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700729 let jail = match simple_jail(cfg, "gpu_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800730 Some(mut jail) => {
731 // Create a tmpfs in the device's root directory so that we can bind mount the
732 // dri directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
733 jail.mount_with_data(
734 Path::new("none"),
735 Path::new("/"),
736 "tmpfs",
737 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
738 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800739 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800740
741 // Device nodes required for DRM.
742 let sys_dev_char_path = Path::new("/sys/dev/char");
David Tolnayfd0971d2019-03-04 17:15:57 -0800743 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800744 let sys_devices_path = Path::new("/sys/devices");
David Tolnayfd0971d2019-03-04 17:15:57 -0800745 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
Jason Macnak23400522020-08-28 09:10:46 -0700746
David Tolnay2b089fc2019-03-04 15:33:22 -0800747 let drm_dri_path = Path::new("/dev/dri");
Jason Macnak23400522020-08-28 09:10:46 -0700748 if drm_dri_path.exists() {
749 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
750 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800751
John Batesb220eac2020-09-14 17:03:02 -0700752 // Prepare GPU shader disk cache directory.
753 if let Some(cache_dir) = cfg
754 .gpu_parameters
755 .as_ref()
756 .and_then(|params| params.cache_path.as_ref())
757 {
758 if cfg!(any(target_arch = "arm", target_arch = "aarch64")) && cfg.sandbox {
759 warn!("shader caching not yet supported on ARM with sandbox enabled");
760 env::set_var("MESA_GLSL_CACHE_DISABLE", "true");
761 } else {
John Bates04059732020-10-01 15:58:55 -0700762 env::set_var("MESA_GLSL_CACHE_DISABLE", "false");
John Batesb220eac2020-09-14 17:03:02 -0700763 env::set_var("MESA_GLSL_CACHE_DIR", cache_dir);
764 if let Some(cache_size) = cfg
765 .gpu_parameters
766 .as_ref()
767 .and_then(|params| params.cache_size.as_ref())
768 {
769 env::set_var("MESA_GLSL_CACHE_MAX_SIZE", cache_size);
770 }
771 let shadercache_path = Path::new(cache_dir);
772 jail.mount_bind(shadercache_path, shadercache_path, true)?;
773 }
774 }
775
David Riley06787c52019-07-24 12:09:07 -0700776 // If the ARM specific devices exist on the host, bind mount them in.
777 let mali0_path = Path::new("/dev/mali0");
778 if mali0_path.exists() {
779 jail.mount_bind(mali0_path, mali0_path, true)?;
780 }
781
782 let pvr_sync_path = Path::new("/dev/pvr_sync");
783 if pvr_sync_path.exists() {
784 jail.mount_bind(pvr_sync_path, pvr_sync_path, true)?;
785 }
786
Gurchetan Singhb66d6f62019-11-08 10:41:29 -0800787 // If the udmabuf driver exists on the host, bind mount it in.
788 let udmabuf_path = Path::new("/dev/udmabuf");
789 if udmabuf_path.exists() {
790 jail.mount_bind(udmabuf_path, udmabuf_path, true)?;
791 }
792
David Tolnay2b089fc2019-03-04 15:33:22 -0800793 // Libraries that are required when mesa drivers are dynamically loaded.
Chia-I Wud562b1a2020-12-27 21:08:27 -0800794 let lib_dirs = &[
795 "/usr/lib",
796 "/usr/lib64",
797 "/lib",
798 "/lib64",
John Batesef085de2021-03-15 08:55:54 -0700799 "/usr/share/glvnd",
Chia-I Wud562b1a2020-12-27 21:08:27 -0800800 "/usr/share/vulkan",
801 ];
David Riley06787c52019-07-24 12:09:07 -0700802 for dir in lib_dirs {
803 let dir_path = Path::new(dir);
804 if dir_path.exists() {
805 jail.mount_bind(dir_path, dir_path, false)?;
806 }
807 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800808
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700809 // Bind mount the wayland socket's directory into jail's root. This is necessary since
810 // each new wayland context must open() the socket. If the wayland socket is ever
811 // destroyed and remade in the same host directory, new connections will be possible
812 // without restarting the wayland device.
813 for dir in &wayland_socket_dirs {
814 jail.mount_bind(dir, dir, true)?;
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700815 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800816
Fergus Dall51200512021-08-19 12:54:26 +1000817 add_current_user_to_jail(&mut jail)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800818
David Riley54e660b2019-07-24 17:22:50 -0700819 // pvr driver requires read access to /proc/self/task/*/comm.
820 let proc_path = Path::new("/proc");
821 jail.mount(
822 proc_path,
823 proc_path,
824 "proc",
825 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY) as usize,
826 )?;
827
John Bates0d9d0e32020-12-03 11:37:33 -0800828 // To enable perfetto tracing, we need to give access to the perfetto service IPC
829 // endpoints.
830 let perfetto_path = Path::new("/run/perfetto");
831 if perfetto_path.exists() {
832 jail.mount_bind(perfetto_path, perfetto_path, true)?;
833 }
834
David Tolnay2b089fc2019-03-04 15:33:22 -0800835 Some(jail)
836 }
837 None => None,
838 };
839
840 Ok(VirtioDeviceStub {
841 dev: Box::new(dev),
842 jail,
843 })
844}
845
846fn create_wayland_device(
847 cfg: &Config,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800848 control_tube: Tube,
849 resource_bridge: Option<Tube>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800850) -> DeviceResult {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900851 let wayland_socket_dirs = cfg
852 .wayland_socket_paths
853 .iter()
854 .map(|(_name, path)| path.parent())
855 .collect::<Option<Vec<_>>>()
856 .ok_or(Error::InvalidWaylandPath)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800857
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100858 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +0100859 let dev = virtio::Wl::new(
860 features,
861 cfg.wayland_socket_paths.clone(),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800862 control_tube,
Will Deacon81d5adb2020-10-06 18:37:48 +0100863 resource_bridge,
864 )
865 .map_err(Error::WaylandDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800866
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700867 let jail = match simple_jail(cfg, "wl_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800868 Some(mut jail) => {
869 // Create a tmpfs in the device's root directory so that we can bind mount the wayland
870 // socket directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
871 jail.mount_with_data(
872 Path::new("none"),
873 Path::new("/"),
874 "tmpfs",
875 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
876 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800877 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800878
879 // Bind mount the wayland socket's directory into jail's root. This is necessary since
880 // each new wayland context must open() the socket. If the wayland socket is ever
881 // destroyed and remade in the same host directory, new connections will be possible
882 // without restarting the wayland device.
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900883 for dir in &wayland_socket_dirs {
884 jail.mount_bind(dir, dir, true)?;
885 }
Fergus Dall51200512021-08-19 12:54:26 +1000886 add_current_user_to_jail(&mut jail)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800887
888 Some(jail)
889 }
890 None => None,
891 };
892
893 Ok(VirtioDeviceStub {
894 dev: Box::new(dev),
895 jail,
896 })
897}
898
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900899#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
900fn create_video_device(
901 cfg: &Config,
902 typ: devices::virtio::VideoDeviceType,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800903 resource_bridge: Tube,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900904) -> DeviceResult {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700905 let jail = match simple_jail(cfg, "video_device")? {
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900906 Some(mut jail) => {
907 match typ {
Fergus Dall51200512021-08-19 12:54:26 +1000908 devices::virtio::VideoDeviceType::Decoder => add_current_user_to_jail(&mut jail)?,
909 devices::virtio::VideoDeviceType::Encoder => add_current_user_to_jail(&mut jail)?,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900910 };
911
912 // Create a tmpfs in the device's root directory so that we can bind mount files.
913 jail.mount_with_data(
914 Path::new("none"),
915 Path::new("/"),
916 "tmpfs",
917 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
918 "size=67108864",
919 )?;
920
921 // Render node for libvda.
922 let dev_dri_path = Path::new("/dev/dri/renderD128");
923 jail.mount_bind(dev_dri_path, dev_dri_path, false)?;
924
David Stevense341d0a2020-10-08 18:02:32 +0900925 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
926 {
927 // Device nodes used by libdrm through minigbm in libvda on AMD devices.
928 let sys_dev_char_path = Path::new("/sys/dev/char");
929 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
930 let sys_devices_path = Path::new("/sys/devices");
931 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
932
933 // Required for loading dri libraries loaded by minigbm on AMD devices.
934 let lib_dir = Path::new("/usr/lib64");
935 jail.mount_bind(lib_dir, lib_dir, false)?;
936 }
937
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900938 // Device nodes required by libchrome which establishes Mojo connection in libvda.
939 let dev_urandom_path = Path::new("/dev/urandom");
940 jail.mount_bind(dev_urandom_path, dev_urandom_path, false)?;
941 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
942 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
943
944 Some(jail)
945 }
946 None => None,
947 };
948
949 Ok(VirtioDeviceStub {
950 dev: Box::new(devices::virtio::VideoDevice::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100951 virtio::base_features(cfg.protected_vm),
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900952 typ,
953 Some(resource_bridge),
954 )),
955 jail,
956 })
957}
958
959#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
960fn register_video_device(
961 devs: &mut Vec<VirtioDeviceStub>,
Daniel Verkampffb59122021-03-18 14:06:15 -0700962 video_tube: Tube,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900963 cfg: &Config,
964 typ: devices::virtio::VideoDeviceType,
965) -> std::result::Result<(), Error> {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800966 devs.push(create_video_device(cfg, typ, video_tube)?);
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900967 Ok(())
968}
969
Chirantan Ekbote3e8d52b2021-09-10 18:27:16 +0900970fn create_vhost_vsock_device(cfg: &Config, cid: u64) -> DeviceResult {
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100971 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbote3e8d52b2021-09-10 18:27:16 +0900972 let dev = virtio::vhost::Vsock::new(&cfg.vhost_vsock_device_path, features, cid)
Christian Blichmann2f5d4b62021-03-10 18:08:08 +0100973 .map_err(Error::VhostVsockDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800974
975 Ok(VirtioDeviceStub {
976 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700977 jail: simple_jail(cfg, "vhost_vsock_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800978 })
979}
980
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900981fn create_fs_device(
982 cfg: &Config,
983 uid_map: &str,
984 gid_map: &str,
985 src: &Path,
986 tag: &str,
987 fs_cfg: virtio::fs::passthrough::Config,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800988 device_tube: Tube,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900989) -> DeviceResult {
Federico 'Morg' Pareschia1184822021-09-09 10:52:58 +0900990 let max_open_files = base::get_max_open_files().map_err(Error::GetMaxOpenFiles)?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800991 let j = if cfg.sandbox {
992 let seccomp_policy = cfg.seccomp_policy_dir.join("fs_device");
993 let config = SandboxConfig {
994 limit_caps: false,
995 uid_map: Some(uid_map),
996 gid_map: Some(gid_map),
997 log_failures: cfg.seccomp_log_failures,
998 seccomp_policy: &seccomp_policy,
999 };
Chirantan Ekbote34d45e52020-04-20 18:15:02 +09001000 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
1001 // We want bind mounts from the parent namespaces to propagate into the fs device's
1002 // namespace.
1003 jail.set_remount_mode(libc::MS_SLAVE);
1004
1005 jail
Matt Delcoc24ad782020-02-14 13:24:36 -08001006 } else {
1007 create_base_minijail(src, Some(max_open_files), None)?
1008 };
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001009
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001010 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001011 // TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic
1012 // when num_queues > 1.
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001013 let dev =
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001014 virtio::fs::Fs::new(features, tag, 1, fs_cfg, device_tube).map_err(Error::FsDeviceNew)?;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001015
1016 Ok(VirtioDeviceStub {
1017 dev: Box::new(dev),
1018 jail: Some(j),
1019 })
1020}
1021
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001022fn create_9p_device(
1023 cfg: &Config,
1024 uid_map: &str,
1025 gid_map: &str,
1026 src: &Path,
1027 tag: &str,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001028 mut p9_cfg: p9::Config,
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001029) -> DeviceResult {
Federico 'Morg' Pareschia1184822021-09-09 10:52:58 +09001030 let max_open_files = base::get_max_open_files().map_err(Error::GetMaxOpenFiles)?;
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001031 let (jail, root) = if cfg.sandbox {
1032 let seccomp_policy = cfg.seccomp_policy_dir.join("9p_device");
1033 let config = SandboxConfig {
1034 limit_caps: false,
1035 uid_map: Some(uid_map),
1036 gid_map: Some(gid_map),
1037 log_failures: cfg.seccomp_log_failures,
1038 seccomp_policy: &seccomp_policy,
1039 };
David Tolnay2b089fc2019-03-04 15:33:22 -08001040
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001041 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
1042 // We want bind mounts from the parent namespaces to propagate into the 9p server's
1043 // namespace.
1044 jail.set_remount_mode(libc::MS_SLAVE);
Chirantan Ekbote055de382020-01-24 12:16:58 +09001045
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001046 // The shared directory becomes the root of the device's file system.
1047 let root = Path::new("/");
1048 (Some(jail), root)
1049 } else {
1050 // There's no mount namespace so we tell the server to treat the source directory as the
1051 // root.
1052 (None, src)
David Tolnay2b089fc2019-03-04 15:33:22 -08001053 };
1054
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001055 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001056 p9_cfg.root = root.into();
1057 let dev = virtio::P9::new(features, tag, p9_cfg).map_err(Error::P9DeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001058
1059 Ok(VirtioDeviceStub {
1060 dev: Box::new(dev),
1061 jail,
1062 })
1063}
1064
Jakub Starona3411ea2019-04-24 10:55:25 -07001065fn create_pmem_device(
1066 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001067 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001068 resources: &mut SystemAllocator,
1069 disk: &DiskOption,
1070 index: usize,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001071 pmem_device_tube: Tube,
Jakub Starona3411ea2019-04-24 10:55:25 -07001072) -> DeviceResult {
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09001073 let fd = open_file(&disk.path, disk.read_only, false /*O_DIRECT*/)
Andrew Walbranbc55e302021-07-13 17:35:10 +01001074 .map_err(|e| Error::Disk(disk.path.clone(), e.into()))?;
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001075
1076 let (disk_size, arena_size) = {
Daniel Verkamp46d61ba2020-02-25 10:17:50 -08001077 let metadata =
1078 std::fs::metadata(&disk.path).map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001079 let disk_len = metadata.len();
1080 // Linux requires pmem region sizes to be 2 MiB aligned. Linux will fill any partial page
1081 // at the end of an mmap'd file and won't write back beyond the actual file length, but if
1082 // we just align the size of the file to 2 MiB then access beyond the last page of the
1083 // mapped file will generate SIGBUS. So use a memory mapping arena that will provide
1084 // padding up to 2 MiB.
1085 let alignment = 2 * 1024 * 1024;
1086 let align_adjust = if disk_len % alignment != 0 {
1087 alignment - (disk_len % alignment)
1088 } else {
1089 0
1090 };
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001091 (
1092 disk_len,
1093 disk_len
1094 .checked_add(align_adjust)
1095 .ok_or(Error::PmemDeviceImageTooBig)?,
1096 )
Jakub Starona3411ea2019-04-24 10:55:25 -07001097 };
1098
1099 let protection = {
1100 if disk.read_only {
1101 Protection::read()
1102 } else {
1103 Protection::read_write()
1104 }
1105 };
1106
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001107 let arena = {
Jakub Starona3411ea2019-04-24 10:55:25 -07001108 // Conversion from u64 to usize may fail on 32bit system.
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001109 let arena_size = usize::try_from(arena_size).map_err(|_| Error::PmemDeviceImageTooBig)?;
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001110 let disk_size = usize::try_from(disk_size).map_err(|_| Error::PmemDeviceImageTooBig)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001111
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001112 let mut arena = MemoryMappingArena::new(arena_size).map_err(Error::ReservePmemMemory)?;
1113 arena
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001114 .add_fd_offset_protection(0, disk_size, &fd, 0, protection)
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001115 .map_err(Error::ReservePmemMemory)?;
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001116
1117 // If the disk is not a multiple of the page size, the OS will fill the remaining part
1118 // of the page with zeroes. However, the anonymous mapping added below must start on a
1119 // page boundary, so round up the size before calculating the offset of the anon region.
1120 let disk_size = round_up_to_page_size(disk_size);
1121
1122 if arena_size > disk_size {
1123 // Add an anonymous region with the same protection as the disk mapping if the arena
1124 // size was aligned.
1125 arena
1126 .add_anon_protection(disk_size, arena_size - disk_size, protection)
1127 .map_err(Error::ReservePmemMemory)?;
1128 }
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001129 arena
Jakub Starona3411ea2019-04-24 10:55:25 -07001130 };
1131
1132 let mapping_address = resources
Xiong Zhang383b3b52019-10-30 14:59:26 +08001133 .mmio_allocator(MmioType::High)
Jakub Starona3411ea2019-04-24 10:55:25 -07001134 .allocate_with_align(
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001135 arena_size,
Jakub Starona3411ea2019-04-24 10:55:25 -07001136 Alloc::PmemDevice(index),
1137 format!("pmem_disk_image_{}", index),
1138 // Linux kernel requires pmem namespaces to be 128 MiB aligned.
1139 128 * 1024 * 1024, /* 128 MiB */
1140 )
1141 .map_err(Error::AllocatePmemDeviceAddress)?;
1142
Daniel Verkampe1980a92020-02-07 11:00:55 -08001143 let slot = vm
Gurchetan Singh173fe622020-05-21 18:05:06 -07001144 .add_memory_region(
Daniel Verkampe1980a92020-02-07 11:00:55 -08001145 GuestAddress(mapping_address),
Gurchetan Singh173fe622020-05-21 18:05:06 -07001146 Box::new(arena),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001147 /* read_only = */ disk.read_only,
1148 /* log_dirty_pages = */ false,
1149 )
1150 .map_err(Error::AddPmemDeviceMemory)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001151
Daniel Verkampe1980a92020-02-07 11:00:55 -08001152 let dev = virtio::Pmem::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001153 virtio::base_features(cfg.protected_vm),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001154 fd,
1155 GuestAddress(mapping_address),
1156 slot,
1157 arena_size,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001158 Some(pmem_device_tube),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001159 )
1160 .map_err(Error::PmemDeviceNew)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001161
1162 Ok(VirtioDeviceStub {
1163 dev: Box::new(dev) as Box<dyn VirtioDevice>,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001164 jail: simple_jail(cfg, "pmem_device")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07001165 })
1166}
1167
Zide Chendfc4b882021-03-10 16:35:37 -08001168fn create_iommu_device(
1169 cfg: &Config,
Zide Chen71435c12021-03-03 15:02:02 -08001170 phys_max_addr: u64,
Zide Chendfc4b882021-03-10 16:35:37 -08001171 endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
1172) -> DeviceResult {
Zide Chen71435c12021-03-03 15:02:02 -08001173 let dev = virtio::Iommu::new(
1174 virtio::base_features(cfg.protected_vm),
1175 endpoints,
1176 phys_max_addr,
1177 )
1178 .map_err(Error::CreateVirtioIommu)?;
Zide Chendfc4b882021-03-10 16:35:37 -08001179
1180 Ok(VirtioDeviceStub {
1181 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001182 jail: simple_jail(cfg, "iommu_device")?,
Zide Chendfc4b882021-03-10 16:35:37 -08001183 })
1184}
1185
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001186fn create_console_device(cfg: &Config, param: &SerialParameters) -> DeviceResult {
Michael Hoylecd23bc22020-10-20 22:12:20 -07001187 let mut keep_rds = Vec::new();
Michael Hoyle685316f2020-09-16 15:29:20 -07001188 let evt = Event::new().map_err(Error::CreateEvent)?;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001189 let dev = param
Michael Hoylecd23bc22020-10-20 22:12:20 -07001190 .create_serial_device::<Console>(cfg.protected_vm, &evt, &mut keep_rds)
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001191 .map_err(Error::CreateConsole)?;
1192
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001193 let jail = match simple_jail(cfg, "serial")? {
Nicholas Verne71e73d82020-07-08 17:19:55 +10001194 Some(mut jail) => {
1195 // Create a tmpfs in the device's root directory so that we can bind mount the
1196 // log socket directory into it.
1197 // The size=67108864 is size=64*1024*1024 or size=64MB.
1198 jail.mount_with_data(
1199 Path::new("none"),
1200 Path::new("/"),
1201 "tmpfs",
1202 (libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_NOSUID) as usize,
1203 "size=67108864",
1204 )?;
Fergus Dall51200512021-08-19 12:54:26 +10001205 add_current_user_to_jail(&mut jail)?;
Nicholas Verne71e73d82020-07-08 17:19:55 +10001206 let res = param.add_bind_mounts(&mut jail);
1207 if res.is_err() {
1208 error!("failed to add bind mounts for console device");
1209 }
1210 Some(jail)
1211 }
1212 None => None,
1213 };
1214
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001215 Ok(VirtioDeviceStub {
1216 dev: Box::new(dev),
Nicholas Verne71e73d82020-07-08 17:19:55 +10001217 jail, // TODO(dverkamp): use a separate policy for console?
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001218 })
1219}
1220
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001221#[cfg(feature = "audio")]
1222fn create_sound_device(path: &Path, cfg: &Config) -> DeviceResult {
1223 let dev = virtio::new_sound(path, virtio::base_features(cfg.protected_vm))
1224 .map_err(Error::SoundDeviceNew)?;
1225
1226 Ok(VirtioDeviceStub {
1227 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001228 jail: simple_jail(cfg, "vios_audio_device")?,
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001229 })
1230}
1231
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001232// gpu_device_tube is not used when GPU support is disabled.
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -07001233#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001234fn create_virtio_devices(
1235 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001236 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001237 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001238 _exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001239 wayland_device_tube: Tube,
1240 gpu_device_tube: Tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001241 vhost_user_gpu_tubes: Vec<(Tube, Tube)>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001242 balloon_device_tube: Tube,
1243 disk_device_tubes: &mut Vec<Tube>,
1244 pmem_device_tubes: &mut Vec<Tube>,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001245 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001246 fs_device_tubes: &mut Vec<Tube>,
David Tolnay2b089fc2019-03-04 15:33:22 -08001247) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -07001248 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -08001249
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001250 for (_, param) in cfg
1251 .serial_parameters
1252 .iter()
1253 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
1254 {
1255 let dev = create_console_device(cfg, param)?;
1256 devs.push(dev);
1257 }
1258
Zach Reizner8fb52112017-12-13 16:04:39 -08001259 for disk in &cfg.disks {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001260 let disk_device_tube = disk_device_tubes.remove(0);
1261 devs.push(create_block_device(cfg, disk, disk_device_tube)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001262 }
1263
Keiichi Watanabef3a37f42021-01-21 15:41:11 +09001264 for blk in &cfg.vhost_user_blk {
1265 devs.push(create_vhost_user_block_device(cfg, blk)?);
1266 }
1267
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +09001268 for console in &cfg.vhost_user_console {
1269 devs.push(create_vhost_user_console_device(cfg, console)?);
1270 }
1271
Jakub Starona3411ea2019-04-24 10:55:25 -07001272 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001273 let pmem_device_tube = pmem_device_tubes.remove(0);
Daniel Verkampe1980a92020-02-07 11:00:55 -08001274 devs.push(create_pmem_device(
1275 cfg,
1276 vm,
1277 resources,
1278 pmem_disk,
1279 index,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001280 pmem_device_tube,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001281 )?);
Jakub Starona3411ea2019-04-24 10:55:25 -07001282 }
1283
David Tolnay2b089fc2019-03-04 15:33:22 -08001284 devs.push(create_rng_device(cfg)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001285
Woody Chow737ff122021-03-22 17:49:57 +09001286 #[cfg(feature = "audio_cras")]
1287 {
Woody Chow0b2b6062021-09-03 15:40:02 +09001288 if let Some(cras_snd) = &cfg.cras_snd {
1289 devs.push(create_cras_snd_device(cfg, cras_snd.clone())?);
Woody Chow737ff122021-03-22 17:49:57 +09001290 }
1291 }
1292
David Tolnayde6b29a2018-12-20 11:49:46 -08001293 #[cfg(feature = "tpm")]
1294 {
David Tolnay43f8e212019-02-13 17:28:16 -08001295 if cfg.software_tpm {
David Tolnay2b089fc2019-03-04 15:33:22 -08001296 devs.push(create_tpm_device(cfg)?);
David Tolnay43f8e212019-02-13 17:28:16 -08001297 }
David Tolnayde6b29a2018-12-20 11:49:46 -08001298 }
1299
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001300 for (idx, single_touch_spec) in cfg.virtio_single_touch.iter().enumerate() {
1301 devs.push(create_single_touch_device(
1302 cfg,
1303 single_touch_spec,
1304 idx as u32,
1305 )?);
Jorge E. Moreira99d3f082019-03-07 10:59:54 -08001306 }
1307
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001308 for (idx, multi_touch_spec) in cfg.virtio_multi_touch.iter().enumerate() {
1309 devs.push(create_multi_touch_device(
1310 cfg,
1311 multi_touch_spec,
1312 idx as u32,
1313 )?);
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001314 }
1315
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001316 for (idx, trackpad_spec) in cfg.virtio_trackpad.iter().enumerate() {
1317 devs.push(create_trackpad_device(cfg, trackpad_spec, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001318 }
1319
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001320 for (idx, mouse_socket) in cfg.virtio_mice.iter().enumerate() {
1321 devs.push(create_mouse_device(cfg, mouse_socket, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001322 }
1323
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001324 for (idx, keyboard_socket) in cfg.virtio_keyboard.iter().enumerate() {
1325 devs.push(create_keyboard_device(cfg, keyboard_socket, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001326 }
1327
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001328 for (idx, switches_socket) in cfg.virtio_switches.iter().enumerate() {
1329 devs.push(create_switches_device(cfg, switches_socket, idx as u32)?);
Daniel Norman5e23df72021-03-11 10:11:02 -08001330 }
1331
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001332 for dev_path in &cfg.virtio_input_evdevs {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001333 devs.push(create_vinput_device(cfg, dev_path)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001334 }
1335
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001336 devs.push(create_balloon_device(cfg, balloon_device_tube)?);
Dylan Reid295ccac2017-11-06 14:06:24 -08001337
Zach Reizner39aa26b2017-12-12 18:03:23 -08001338 // We checked above that if the IP is defined, then the netmask is, too.
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001339 for tap_fd in &cfg.tap_fd {
David Tolnay2b089fc2019-03-04 15:33:22 -08001340 devs.push(create_tap_net_device(cfg, *tap_fd)?);
Jorge E. Moreirab7952802019-02-12 16:43:05 -08001341 }
1342
David Tolnay2b089fc2019-03-04 15:33:22 -08001343 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
1344 (cfg.host_ip, cfg.netmask, cfg.mac_address)
1345 {
Keiichi Watanabe60686582021-03-12 04:53:51 +09001346 if !cfg.vhost_user_net.is_empty() {
1347 return Err(Error::VhostUserNetWithNetArgs);
1348 }
Chirantan Ekbote3e8d52b2021-09-10 18:27:16 +09001349 devs.push(create_net_device(cfg, host_ip, netmask, mac_address)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001350 }
1351
Keiichi Watanabe60686582021-03-12 04:53:51 +09001352 for net in &cfg.vhost_user_net {
1353 devs.push(create_vhost_user_net_device(cfg, net)?);
1354 }
1355
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09001356 for opt in &cfg.vhost_user_wl {
1357 devs.push(create_vhost_user_wl_device(cfg, opt)?);
1358 }
1359
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001360 #[cfg(feature = "gpu")]
1361 for (opt, (host_tube, device_tube)) in cfg.vhost_user_gpu.iter().zip(vhost_user_gpu_tubes) {
1362 devs.push(create_vhost_user_gpu_device(
1363 cfg,
1364 opt,
1365 host_tube,
1366 device_tube,
1367 )?);
1368 }
1369
David Tolnayfa701712019-02-13 16:42:54 -08001370 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001371 let mut resource_bridges = Vec::<Tube>::new();
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001372
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001373 if !cfg.wayland_socket_paths.is_empty() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001374 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001375 let mut wl_resource_bridge = None::<Tube>;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001376
1377 #[cfg(feature = "gpu")]
1378 {
Jason Macnakcc7070b2019-11-06 14:48:12 -08001379 if cfg.gpu_parameters.is_some() {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001380 let (wl_socket, gpu_socket) = Tube::pair().map_err(Error::CreateTube)?;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001381 resource_bridges.push(gpu_socket);
1382 wl_resource_bridge = Some(wl_socket);
1383 }
1384 }
1385
1386 devs.push(create_wayland_device(
1387 cfg,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001388 wayland_device_tube,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001389 wl_resource_bridge,
1390 )?);
1391 }
David Tolnayfa701712019-02-13 16:42:54 -08001392
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001393 #[cfg(feature = "video-decoder")]
Daniel Verkampffb59122021-03-18 14:06:15 -07001394 let video_dec_tube = if cfg.video_dec {
1395 let (video_tube, gpu_tube) = Tube::pair().map_err(Error::CreateTube)?;
1396 resource_bridges.push(gpu_tube);
1397 Some(video_tube)
1398 } else {
1399 None
1400 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001401
1402 #[cfg(feature = "video-encoder")]
Daniel Verkampffb59122021-03-18 14:06:15 -07001403 let video_enc_tube = if cfg.video_enc {
1404 let (video_tube, gpu_tube) = Tube::pair().map_err(Error::CreateTube)?;
1405 resource_bridges.push(gpu_tube);
1406 Some(video_tube)
1407 } else {
1408 None
1409 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001410
Zach Reizner3a8100a2017-09-13 19:15:43 -07001411 #[cfg(feature = "gpu")]
1412 {
Noah Golddc7f52b2020-02-01 13:01:58 -08001413 if let Some(gpu_parameters) = &cfg.gpu_parameters {
Jason Macnakd659a0d2021-03-15 15:33:01 -07001414 let mut gpu_display_w = DEFAULT_DISPLAY_WIDTH;
1415 let mut gpu_display_h = DEFAULT_DISPLAY_HEIGHT;
1416 if !gpu_parameters.displays.is_empty() {
1417 gpu_display_w = gpu_parameters.displays[0].width;
1418 gpu_display_h = gpu_parameters.displays[0].height;
1419 }
1420
Zach Reizner65b98f12019-11-22 17:34:58 -08001421 let mut event_devices = Vec::new();
1422 if cfg.display_window_mouse {
1423 let (event_device_socket, virtio_dev_socket) =
1424 UnixStream::pair().map_err(Error::CreateSocket)?;
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001425 let (multi_touch_width, multi_touch_height) = cfg
1426 .virtio_multi_touch
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001427 .first()
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001428 .as_ref()
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001429 .map(|multi_touch_spec| multi_touch_spec.get_size())
Jason Macnakd659a0d2021-03-15 15:33:01 -07001430 .unwrap_or((gpu_display_w, gpu_display_h));
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001431 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001432 // u32::MAX is the least likely to collide with the indices generated above for
1433 // the multi_touch options, which begin at 0.
1434 u32::MAX,
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001435 virtio_dev_socket,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001436 multi_touch_width,
1437 multi_touch_height,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001438 virtio::base_features(cfg.protected_vm),
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001439 )
1440 .map_err(Error::InputDeviceNew)?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001441 devs.push(VirtioDeviceStub {
1442 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001443 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001444 });
1445 event_devices.push(EventDevice::touchscreen(event_device_socket));
1446 }
1447 if cfg.display_window_keyboard {
1448 let (event_device_socket, virtio_dev_socket) =
1449 UnixStream::pair().map_err(Error::CreateSocket)?;
Noah Goldd4ca29b2020-10-27 12:21:52 -07001450 let dev = virtio::new_keyboard(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001451 // u32::MAX is the least likely to collide with the indices generated above for
1452 // the multi_touch options, which begin at 0.
1453 u32::MAX,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001454 virtio_dev_socket,
1455 virtio::base_features(cfg.protected_vm),
1456 )
1457 .map_err(Error::InputDeviceNew)?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001458 devs.push(VirtioDeviceStub {
1459 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001460 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001461 });
1462 event_devices.push(EventDevice::keyboard(event_device_socket));
1463 }
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001464 devs.push(create_gpu_device(
1465 cfg,
1466 _exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001467 gpu_device_tube,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001468 resource_bridges,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001469 // Use the unnamed socket for GPU display screens.
1470 cfg.wayland_socket_paths.get(""),
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001471 cfg.x_display.clone(),
Zach Reizner65b98f12019-11-22 17:34:58 -08001472 event_devices,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001473 map_request,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001474 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -07001475 }
1476 }
1477
Daniel Verkampffb59122021-03-18 14:06:15 -07001478 #[cfg(feature = "video-decoder")]
1479 {
1480 if let Some(video_dec_tube) = video_dec_tube {
1481 register_video_device(
1482 &mut devs,
1483 video_dec_tube,
1484 cfg,
1485 devices::virtio::VideoDeviceType::Decoder,
1486 )?;
1487 }
1488 }
1489
1490 #[cfg(feature = "video-encoder")]
1491 {
1492 if let Some(video_enc_tube) = video_enc_tube {
1493 register_video_device(
1494 &mut devs,
1495 video_enc_tube,
1496 cfg,
1497 devices::virtio::VideoDeviceType::Encoder,
1498 )?;
1499 }
1500 }
1501
Zach Reizneraa575662018-08-15 10:46:32 -07001502 if let Some(cid) = cfg.cid {
Chirantan Ekbote3e8d52b2021-09-10 18:27:16 +09001503 devs.push(create_vhost_vsock_device(cfg, cid)?);
Zach Reizneraa575662018-08-15 10:46:32 -07001504 }
1505
Woody Chow5890b702021-02-12 14:57:02 +09001506 for vhost_user_fs in &cfg.vhost_user_fs {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001507 devs.push(create_vhost_user_fs_device(cfg, vhost_user_fs)?);
Woody Chow5890b702021-02-12 14:57:02 +09001508 }
1509
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001510 for shared_dir in &cfg.shared_dirs {
1511 let SharedDir {
1512 src,
1513 tag,
1514 kind,
1515 uid_map,
1516 gid_map,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001517 fs_cfg,
1518 p9_cfg,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001519 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -08001520
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001521 let dev = match kind {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001522 SharedDirKind::FS => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001523 let device_tube = fs_device_tubes.remove(0);
1524 create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone(), device_tube)?
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001525 }
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001526 SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag, p9_cfg.clone())?,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001527 };
1528 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -08001529 }
1530
JaeMan Parkeb9cc532021-07-02 15:02:59 +09001531 if let Some(vhost_user_mac80211_hwsim) = &cfg.vhost_user_mac80211_hwsim {
1532 devs.push(create_vhost_user_mac80211_hwsim_device(
1533 cfg,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001534 vhost_user_mac80211_hwsim,
JaeMan Parkeb9cc532021-07-02 15:02:59 +09001535 )?);
1536 }
1537
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001538 #[cfg(feature = "audio")]
1539 if let Some(path) = &cfg.sound {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001540 devs.push(create_sound_device(path, cfg)?);
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001541 }
1542
David Tolnay2b089fc2019-03-04 15:33:22 -08001543 Ok(devs)
1544}
1545
Xiong Zhang10f15052021-04-08 17:23:33 +08001546fn create_vfio_device(
1547 cfg: &Config,
1548 vm: &impl Vm,
1549 resources: &mut SystemAllocator,
1550 control_tubes: &mut Vec<TaggedControlTube>,
1551 vfio_path: &Path,
Xiong Zhange19ab752021-05-20 18:18:46 +08001552 hotplug: bool,
Zide Chendfc4b882021-03-10 16:35:37 -08001553 endpoints: &mut BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
1554 iommu_enabled: bool,
Xiong Zhang10f15052021-04-08 17:23:33 +08001555) -> DeviceResult<(Box<VfioPciDevice>, Option<Minijail>)> {
Zide Chendfc4b882021-03-10 16:35:37 -08001556 let vfio_container = VfioCommonSetup::vfio_get_container(vfio_path, iommu_enabled)
1557 .map_err(Error::CreateVfioDevice)?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001558
1559 // create MSI, MSI-X, and Mem request sockets for each vfio device
1560 let (vfio_host_tube_msi, vfio_device_tube_msi) = Tube::pair().map_err(Error::CreateTube)?;
1561 control_tubes.push(TaggedControlTube::VmIrq(vfio_host_tube_msi));
1562
1563 let (vfio_host_tube_msix, vfio_device_tube_msix) = Tube::pair().map_err(Error::CreateTube)?;
1564 control_tubes.push(TaggedControlTube::VmIrq(vfio_host_tube_msix));
1565
1566 let (vfio_host_tube_mem, vfio_device_tube_mem) = Tube::pair().map_err(Error::CreateTube)?;
1567 control_tubes.push(TaggedControlTube::VmMemory(vfio_host_tube_mem));
1568
Xiong Zhange19ab752021-05-20 18:18:46 +08001569 // put hotplug vfio device on Bus#1 temporary
1570 let bus_num = if hotplug { Some(1) } else { None };
Xiong Zhangdea7dbb2021-07-26 14:49:03 +08001571 let vfio_device = VfioDevice::new(vfio_path, vm, vfio_container.clone(), iommu_enabled)
1572 .map_err(Error::CreateVfioDevice)?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001573 let mut vfio_pci_device = Box::new(VfioPciDevice::new(
1574 vfio_device,
Xiong Zhange19ab752021-05-20 18:18:46 +08001575 bus_num,
Xiong Zhang10f15052021-04-08 17:23:33 +08001576 vfio_device_tube_msi,
1577 vfio_device_tube_msix,
1578 vfio_device_tube_mem,
1579 ));
1580 // early reservation for pass-through PCI devices.
Zide Chendfc4b882021-03-10 16:35:37 -08001581 let endpoint_addr = vfio_pci_device.allocate_address(resources);
1582 if endpoint_addr.is_err() {
Xiong Zhang10f15052021-04-08 17:23:33 +08001583 warn!(
1584 "address reservation failed for vfio {}",
1585 vfio_pci_device.debug_label()
1586 );
1587 }
1588
Zide Chendfc4b882021-03-10 16:35:37 -08001589 if iommu_enabled {
1590 endpoints.insert(endpoint_addr.unwrap().to_u32(), vfio_container);
1591 }
1592
Xiong Zhang10f15052021-04-08 17:23:33 +08001593 Ok((vfio_pci_device, simple_jail(cfg, "vfio_device")?))
1594}
1595
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001596fn create_vfio_platform_device(
1597 cfg: &Config,
1598 vm: &impl Vm,
1599 _resources: &mut SystemAllocator,
1600 control_tubes: &mut Vec<TaggedControlTube>,
1601 vfio_path: &Path,
1602 _endpoints: &mut BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
1603 iommu_enabled: bool,
1604) -> DeviceResult<(VfioPlatformDevice, Option<Minijail>)> {
1605 let vfio_container = VfioCommonSetup::vfio_get_container(vfio_path, iommu_enabled)
1606 .map_err(Error::CreateVfioDevice)?;
1607
1608 let (vfio_host_tube_mem, vfio_device_tube_mem) = Tube::pair().map_err(Error::CreateTube)?;
1609 control_tubes.push(TaggedControlTube::VmMemory(vfio_host_tube_mem));
1610
1611 let vfio_device = VfioDevice::new(vfio_path, vm, vfio_container, iommu_enabled)
1612 .map_err(Error::CreateVfioDevice)?;
1613 let vfio_plat_dev = VfioPlatformDevice::new(vfio_device, vfio_device_tube_mem);
1614
1615 Ok((vfio_plat_dev, simple_jail(cfg, "vfio_platform_device")?))
1616}
1617
David Tolnay2b089fc2019-03-04 15:33:22 -08001618fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -06001619 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001620 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001621 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001622 exit_evt: &Event,
Zide Chen71435c12021-03-03 15:02:02 -08001623 phys_max_addr: u64,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001624 control_tubes: &mut Vec<TaggedControlTube>,
1625 wayland_device_tube: Tube,
1626 gpu_device_tube: Tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001627 vhost_user_gpu_tubes: Vec<(Tube, Tube)>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001628 balloon_device_tube: Tube,
1629 disk_device_tubes: &mut Vec<Tube>,
1630 pmem_device_tubes: &mut Vec<Tube>,
1631 fs_device_tubes: &mut Vec<Tube>,
Daniel Verkampf1439d42021-05-21 13:55:10 -07001632 #[cfg(feature = "usb")] usb_provider: HostBackendDeviceProvider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001633 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001634) -> DeviceResult<Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>> {
David Tolnay2b089fc2019-03-04 15:33:22 -08001635 let stubs = create_virtio_devices(
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001636 cfg,
Jakub Starona3411ea2019-04-24 10:55:25 -07001637 vm,
1638 resources,
David Tolnay2b089fc2019-03-04 15:33:22 -08001639 exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001640 wayland_device_tube,
1641 gpu_device_tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001642 vhost_user_gpu_tubes,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001643 balloon_device_tube,
1644 disk_device_tubes,
1645 pmem_device_tubes,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001646 map_request,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001647 fs_device_tubes,
David Tolnay2b089fc2019-03-04 15:33:22 -08001648 )?;
1649
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001650 let mut devices = Vec::new();
David Tolnay2b089fc2019-03-04 15:33:22 -08001651
1652 for stub in stubs {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001653 let (msi_host_tube, msi_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
1654 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
Zach Reiznerdc748482021-04-14 13:59:30 -07001655 let dev = VirtioPciDevice::new(vm.get_memory().clone(), stub.dev, msi_device_tube)
Daniel Verkampbb712d62019-11-19 09:47:33 -08001656 .map_err(Error::VirtioPciDev)?;
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001657 let dev = Box::new(dev) as Box<dyn BusDeviceObj>;
1658 devices.push((dev, stub.jail));
David Tolnay2b089fc2019-03-04 15:33:22 -08001659 }
1660
Andrew Scull1590e6f2020-03-18 18:00:47 +00001661 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +08001662 for ac97_param in &cfg.ac97_parameters {
Zach Reiznerdc748482021-04-14 13:59:30 -07001663 let dev = Ac97Dev::try_new(vm.get_memory().clone(), ac97_param.clone())
1664 .map_err(Error::CreateAc97)?;
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001665 let jail = simple_jail(cfg, dev.minijail_policy())?;
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001666 devices.push((Box::new(dev), jail));
David Tolnay2b089fc2019-03-04 15:33:22 -08001667 }
Andrew Scull1590e6f2020-03-18 18:00:47 +00001668
Daniel Verkampf1439d42021-05-21 13:55:10 -07001669 #[cfg(feature = "usb")]
1670 {
1671 // Create xhci controller.
1672 let usb_controller = Box::new(XhciController::new(vm.get_memory().clone(), usb_provider));
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001673 devices.push((usb_controller, simple_jail(cfg, "xhci")?));
Daniel Verkampf1439d42021-05-21 13:55:10 -07001674 }
David Tolnay2b089fc2019-03-04 15:33:22 -08001675
Zide Chen5deee482021-04-19 11:06:01 -07001676 if !cfg.vfio.is_empty() {
Zide Chendfc4b882021-03-10 16:35:37 -08001677 let mut iommu_attached_endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>> =
1678 BTreeMap::new();
1679
Tomasz Nowicki71aca792021-06-09 18:53:49 +00001680 for vfio_dev in cfg
1681 .vfio
1682 .iter()
1683 .filter(|dev| dev.get_type() == VfioType::Pci)
1684 {
1685 let vfio_path = &vfio_dev.vfio_path;
Zide Chen5deee482021-04-19 11:06:01 -07001686 let (vfio_pci_device, jail) = create_vfio_device(
1687 cfg,
1688 vm,
1689 resources,
1690 control_tubes,
1691 vfio_path.as_path(),
Xiong Zhange19ab752021-05-20 18:18:46 +08001692 false,
Zide Chendfc4b882021-03-10 16:35:37 -08001693 &mut iommu_attached_endpoints,
Tomasz Nowicki71aca792021-06-09 18:53:49 +00001694 vfio_dev.iommu_enabled(),
Zide Chen5deee482021-04-19 11:06:01 -07001695 )?;
Zide Chendfc4b882021-03-10 16:35:37 -08001696
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001697 devices.push((vfio_pci_device, jail));
Zide Chen5deee482021-04-19 11:06:01 -07001698 }
Zide Chendfc4b882021-03-10 16:35:37 -08001699
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001700 for vfio_dev in cfg
1701 .vfio
1702 .iter()
1703 .filter(|dev| dev.get_type() == VfioType::Platform)
1704 {
1705 let vfio_path = &vfio_dev.vfio_path;
1706 let (vfio_plat_dev, jail) = create_vfio_platform_device(
1707 cfg,
1708 vm,
1709 resources,
1710 control_tubes,
1711 vfio_path.as_path(),
1712 &mut iommu_attached_endpoints,
1713 false, // Virtio IOMMU is not supported yet
1714 )?;
1715
1716 devices.push((Box::new(vfio_plat_dev), jail));
1717 }
1718
Zide Chendfc4b882021-03-10 16:35:37 -08001719 if !iommu_attached_endpoints.is_empty() {
Zide Chen71435c12021-03-03 15:02:02 -08001720 let iommu_dev = create_iommu_device(cfg, phys_max_addr, iommu_attached_endpoints)?;
Zide Chendfc4b882021-03-10 16:35:37 -08001721
1722 let (msi_host_tube, msi_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
1723 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
Peter Fangad3b24e2021-06-21 00:43:29 -07001724 let mut dev =
1725 VirtioPciDevice::new(vm.get_memory().clone(), iommu_dev.dev, msi_device_tube)
1726 .map_err(Error::VirtioPciDev)?;
1727 // early reservation for viommu.
1728 dev.allocate_address(resources)
1729 .map_err(|_| Error::VirtioPciDev(base::Error::new(EINVAL)))?;
1730 let dev = Box::new(dev);
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001731 devices.push((dev, iommu_dev.jail));
Zide Chendfc4b882021-03-10 16:35:37 -08001732 }
Xiong Zhang17b0daf2019-04-23 17:14:50 +08001733 }
1734
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001735 Ok(devices)
David Tolnay2b089fc2019-03-04 15:33:22 -08001736}
1737
1738#[derive(Copy, Clone)]
Chirantan Ekbote1a2683b2019-11-26 16:28:23 +09001739#[cfg_attr(not(feature = "tpm"), allow(dead_code))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001740struct Ids {
1741 uid: uid_t,
1742 gid: gid_t,
1743}
1744
David Tolnay48c48292019-03-01 16:54:25 -08001745// Set the uid/gid for the jailed process and give a basic id map. This is
1746// required for bind mounts to work.
Fergus Dall51200512021-08-19 12:54:26 +10001747fn add_current_user_to_jail(jail: &mut Minijail) -> Result<Ids> {
1748 let crosvm_uid = geteuid();
1749 let crosvm_gid = getegid();
David Tolnay48c48292019-03-01 16:54:25 -08001750
David Tolnay48c48292019-03-01 16:54:25 -08001751 jail.uidmap(&format!("{0} {0} 1", crosvm_uid))
1752 .map_err(Error::SettingUidMap)?;
1753 jail.gidmap(&format!("{0} {0} 1", crosvm_gid))
1754 .map_err(Error::SettingGidMap)?;
1755
Chirantan Ekbotee1663ee2021-09-03 18:31:25 +09001756 if crosvm_uid != 0 {
1757 jail.change_uid(crosvm_uid);
1758 }
1759 if crosvm_gid != 0 {
1760 jail.change_gid(crosvm_gid);
1761 }
Fergus Dall51200512021-08-19 12:54:26 +10001762
David Tolnay41a6f842019-03-01 16:18:44 -08001763 Ok(Ids {
1764 uid: crosvm_uid,
1765 gid: crosvm_gid,
1766 })
David Tolnay48c48292019-03-01 16:54:25 -08001767}
1768
Zach Reizner65b98f12019-11-22 17:34:58 -08001769trait IntoUnixStream {
1770 fn into_unix_stream(self) -> Result<UnixStream>;
1771}
1772
1773impl<'a> IntoUnixStream for &'a Path {
1774 fn into_unix_stream(self) -> Result<UnixStream> {
Andrew Walbranbc55e302021-07-13 17:35:10 +01001775 if let Some(fd) =
1776 safe_descriptor_from_path(self).map_err(|e| Error::InputEventsOpen(e.into()))?
1777 {
1778 Ok(fd.into())
Zach Reizner65b98f12019-11-22 17:34:58 -08001779 } else {
1780 UnixStream::connect(self).map_err(Error::InputEventsOpen)
1781 }
1782 }
1783}
1784impl<'a> IntoUnixStream for &'a PathBuf {
1785 fn into_unix_stream(self) -> Result<UnixStream> {
1786 self.as_path().into_unix_stream()
1787 }
1788}
1789
1790impl IntoUnixStream for UnixStream {
1791 fn into_unix_stream(self) -> Result<UnixStream> {
1792 Ok(self)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001793 }
1794}
1795
Steven Richmanf32d0b42020-06-20 21:45:32 -07001796fn setup_vcpu_signal_handler<T: Vcpu>(use_hypervisor_signals: bool) -> Result<()> {
1797 if use_hypervisor_signals {
Matt Delco84cf9c02019-10-07 22:38:13 -07001798 unsafe {
Allen Webb44c728c2021-03-23 15:22:41 -05001799 extern "C" fn handle_signal(_: c_int) {}
Matt Delco84cf9c02019-10-07 22:38:13 -07001800 // Our signal handler does nothing and is trivially async signal safe.
1801 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
1802 .map_err(Error::RegisterSignalHandler)?;
1803 }
1804 block_signal(SIGRTMIN() + 0).map_err(Error::BlockSignal)?;
1805 } else {
1806 unsafe {
Allen Webb44c728c2021-03-23 15:22:41 -05001807 extern "C" fn handle_signal<T: Vcpu>(_: c_int) {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001808 T::set_local_immediate_exit(true);
Matt Delco84cf9c02019-10-07 22:38:13 -07001809 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001810 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal::<T>)
Matt Delco84cf9c02019-10-07 22:38:13 -07001811 .map_err(Error::RegisterSignalHandler)?;
1812 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001813 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001814 Ok(())
1815}
1816
Steven Richmanf32d0b42020-06-20 21:45:32 -07001817// Sets up a vcpu and converts it into a runnable vcpu.
Zach Reizner2c770e62020-09-30 16:49:59 -07001818fn runnable_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001819 cpu_id: usize,
1820 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07001821 vm: impl VmArch,
Zach Reiznerdc748482021-04-14 13:59:30 -07001822 irq_chip: &mut dyn IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001823 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001824 run_rt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001825 vcpu_affinity: Vec<usize>,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001826 no_smt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001827 has_bios: bool,
1828 use_hypervisor_signals: bool,
Zach Reizner2c770e62020-09-30 16:49:59 -07001829) -> Result<(V, VcpuRunHandle)>
Steven Richmanf32d0b42020-06-20 21:45:32 -07001830where
Zach Reizner2c770e62020-09-30 16:49:59 -07001831 V: VcpuArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001832{
Zach Reizner304e7312020-09-29 16:00:24 -07001833 let mut vcpu = match vcpu {
1834 Some(v) => v,
1835 None => {
1836 // If vcpu is None, it means this arch/hypervisor requires create_vcpu to be called from
1837 // the vcpu thread.
1838 match vm
1839 .create_vcpu(cpu_id)
1840 .map_err(Error::CreateVcpu)?
1841 .downcast::<V>()
1842 {
1843 Ok(v) => *v,
1844 Err(_) => panic!("VM created wrong type of VCPU"),
1845 }
1846 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001847 };
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001848
Steven Richmanf32d0b42020-06-20 21:45:32 -07001849 irq_chip
Zach Reizner304e7312020-09-29 16:00:24 -07001850 .add_vcpu(cpu_id, &vcpu)
Steven Richmanf32d0b42020-06-20 21:45:32 -07001851 .map_err(Error::AddIrqChipVcpu)?;
1852
Daniel Verkampcaf9ced2020-09-29 15:35:02 -07001853 if !vcpu_affinity.is_empty() {
1854 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
1855 error!("Failed to set CPU affinity: {}", e);
1856 }
1857 }
1858
Steven Richmanf32d0b42020-06-20 21:45:32 -07001859 Arch::configure_vcpu(
1860 vm.get_memory(),
1861 vm.get_hypervisor(),
1862 irq_chip,
1863 &mut vcpu,
1864 cpu_id,
1865 vcpu_count,
1866 has_bios,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001867 no_smt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001868 )
1869 .map_err(Error::ConfigureVcpu)?;
1870
Zach Reizner026f72f2021-06-01 14:35:29 -07001871 if let Err(e) = enable_core_scheduling() {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001872 error!("Failed to enable core scheduling: {}", e);
1873 }
1874
Kansho Nishidaab205af2020-08-13 18:17:50 +09001875 if run_rt {
1876 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
1877 if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
1878 .and_then(|_| set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL)))
1879 {
1880 warn!("Failed to set vcpu to real time: {}", e);
1881 }
1882 }
1883
Steven Richmanf32d0b42020-06-20 21:45:32 -07001884 if use_hypervisor_signals {
1885 let mut v = get_blocked_signals().map_err(Error::GetSignalMask)?;
1886 v.retain(|&x| x != SIGRTMIN() + 0);
1887 vcpu.set_signal_mask(&v).map_err(Error::SettingSignalMask)?;
1888 }
1889
Zach Reizner2c770e62020-09-30 16:49:59 -07001890 let vcpu_run_handle = vcpu
1891 .take_run_handle(Some(SIGRTMIN() + 0))
1892 .map_err(Error::RunnableVcpu)?;
1893
1894 Ok((vcpu, vcpu_run_handle))
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001895}
1896
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001897#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1898fn handle_debug_msg<V>(
1899 cpu_id: usize,
1900 vcpu: &V,
1901 guest_mem: &GuestMemory,
1902 d: VcpuDebug,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001903 reply_tube: &mpsc::Sender<VcpuDebugStatusMessage>,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001904) -> Result<()>
1905where
1906 V: VcpuArch + 'static,
1907{
1908 match d {
1909 VcpuDebug::ReadRegs => {
1910 let msg = VcpuDebugStatusMessage {
1911 cpu: cpu_id as usize,
1912 msg: VcpuDebugStatus::RegValues(
1913 Arch::debug_read_registers(vcpu as &V).map_err(Error::HandleDebugCommand)?,
1914 ),
1915 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001916 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001917 .send(msg)
1918 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1919 }
1920 VcpuDebug::WriteRegs(regs) => {
1921 Arch::debug_write_registers(vcpu as &V, &regs).map_err(Error::HandleDebugCommand)?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001922 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001923 .send(VcpuDebugStatusMessage {
1924 cpu: cpu_id as usize,
1925 msg: VcpuDebugStatus::CommandComplete,
1926 })
1927 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1928 }
1929 VcpuDebug::ReadMem(vaddr, len) => {
1930 let msg = VcpuDebugStatusMessage {
1931 cpu: cpu_id as usize,
1932 msg: VcpuDebugStatus::MemoryRegion(
1933 Arch::debug_read_memory(vcpu as &V, guest_mem, vaddr, len)
1934 .unwrap_or(Vec::new()),
1935 ),
1936 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001937 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001938 .send(msg)
1939 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1940 }
1941 VcpuDebug::WriteMem(vaddr, buf) => {
1942 Arch::debug_write_memory(vcpu as &V, guest_mem, vaddr, &buf)
1943 .map_err(Error::HandleDebugCommand)?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001944 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001945 .send(VcpuDebugStatusMessage {
1946 cpu: cpu_id as usize,
1947 msg: VcpuDebugStatus::CommandComplete,
1948 })
1949 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1950 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09001951 VcpuDebug::EnableSinglestep => {
1952 Arch::debug_enable_singlestep(vcpu as &V).map_err(Error::HandleDebugCommand)?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001953 reply_tube
Keiichi Watanabe23f94712020-10-22 17:43:06 +09001954 .send(VcpuDebugStatusMessage {
1955 cpu: cpu_id as usize,
1956 msg: VcpuDebugStatus::CommandComplete,
1957 })
1958 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1959 }
1960 VcpuDebug::SetHwBreakPoint(addrs) => {
1961 Arch::debug_set_hw_breakpoints(vcpu as &V, &addrs)
1962 .map_err(Error::HandleDebugCommand)?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001963 reply_tube
Keiichi Watanabe23f94712020-10-22 17:43:06 +09001964 .send(VcpuDebugStatusMessage {
1965 cpu: cpu_id as usize,
1966 msg: VcpuDebugStatus::CommandComplete,
1967 })
1968 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1969 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001970 }
1971}
1972
Zach Reizner2c770e62020-09-30 16:49:59 -07001973fn run_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001974 cpu_id: usize,
1975 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07001976 vm: impl VmArch + 'static,
Zach Reiznerdc748482021-04-14 13:59:30 -07001977 mut irq_chip: Box<dyn IrqChipArch + 'static>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001978 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001979 run_rt: bool,
Daniel Verkamp107edb32019-04-05 09:58:48 -07001980 vcpu_affinity: Vec<usize>,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09001981 delay_rt: bool,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001982 no_smt: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001983 start_barrier: Arc<Barrier>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001984 has_bios: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001985 io_bus: devices::Bus,
1986 mmio_bus: devices::Bus,
Michael Hoyle685316f2020-09-16 15:29:20 -07001987 exit_evt: Event,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001988 requires_pvclock_ctrl: bool,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001989 from_main_tube: mpsc::Receiver<VcpuControl>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001990 use_hypervisor_signals: bool,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001991 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_tube: Option<
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001992 mpsc::Sender<VcpuDebugStatusMessage>,
1993 >,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001994) -> Result<JoinHandle<()>>
1995where
Zach Reizner2c770e62020-09-30 16:49:59 -07001996 V: VcpuArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001997{
Zach Reizner8fb52112017-12-13 16:04:39 -08001998 thread::Builder::new()
1999 .name(format!("crosvm_vcpu{}", cpu_id))
2000 .spawn(move || {
Zach Reizner95885312020-01-29 18:06:01 -08002001 // The VCPU thread must trigger the `exit_evt` in all paths, and a `ScopedEvent`'s Drop
2002 // implementation accomplishes that.
2003 let _scoped_exit_evt = ScopedEvent::from(exit_evt);
2004
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002005 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2006 let guest_mem = vm.get_memory().clone();
Zach Reizner2c770e62020-09-30 16:49:59 -07002007 let runnable_vcpu = runnable_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002008 cpu_id,
2009 vcpu,
2010 vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07002011 irq_chip.as_mut(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002012 vcpu_count,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002013 run_rt && !delay_rt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002014 vcpu_affinity,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002015 no_smt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002016 has_bios,
2017 use_hypervisor_signals,
2018 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08002019
Zach Reizner8fb52112017-12-13 16:04:39 -08002020 start_barrier.wait();
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002021
Zach Reizner2c770e62020-09-30 16:49:59 -07002022 let (vcpu, vcpu_run_handle) = match runnable_vcpu {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002023 Ok(v) => v,
2024 Err(e) => {
2025 error!("failed to start vcpu {}: {}", cpu_id, e);
2026 return;
2027 }
2028 };
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002029
Dylan Reidb0492662019-05-17 14:50:13 -07002030 let mut run_mode = VmRunMode::Running;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002031 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002032 if to_gdb_tube.is_some() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002033 // Wait until a GDB client attaches
2034 run_mode = VmRunMode::Breakpoint;
2035 }
2036
Dylan Reidb0492662019-05-17 14:50:13 -07002037 let mut interrupted_by_signal = false;
2038
2039 'vcpu_loop: loop {
2040 // Start by checking for messages to process and the run state of the CPU.
2041 // An extra check here for Running so there isn't a need to call recv unless a
2042 // message is likely to be ready because a signal was sent.
2043 if interrupted_by_signal || run_mode != VmRunMode::Running {
2044 'state_loop: loop {
2045 // Tries to get a pending message without blocking first.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002046 let msg = match from_main_tube.try_recv() {
Dylan Reidb0492662019-05-17 14:50:13 -07002047 Ok(m) => m,
2048 Err(mpsc::TryRecvError::Empty) if run_mode == VmRunMode::Running => {
2049 // If the VM is running and no message is pending, the state won't
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002050 // change.
Dylan Reidb0492662019-05-17 14:50:13 -07002051 break 'state_loop;
2052 }
2053 Err(mpsc::TryRecvError::Empty) => {
2054 // If the VM is not running, wait until a message is ready.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002055 match from_main_tube.recv() {
Dylan Reidb0492662019-05-17 14:50:13 -07002056 Ok(m) => m,
2057 Err(mpsc::RecvError) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002058 error!("Failed to read from main tube in vcpu");
Dylan Reidb0492662019-05-17 14:50:13 -07002059 break 'vcpu_loop;
2060 }
2061 }
2062 }
2063 Err(mpsc::TryRecvError::Disconnected) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002064 error!("Failed to read from main tube in vcpu");
Dylan Reidb0492662019-05-17 14:50:13 -07002065 break 'vcpu_loop;
2066 }
2067 };
2068
2069 // Collect all pending messages.
2070 let mut messages = vec![msg];
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002071 messages.append(&mut from_main_tube.try_iter().collect());
Dylan Reidb0492662019-05-17 14:50:13 -07002072
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002073 for msg in messages {
2074 match msg {
2075 VcpuControl::RunState(new_mode) => {
2076 run_mode = new_mode;
2077 match run_mode {
2078 VmRunMode::Running => break 'state_loop,
2079 VmRunMode::Suspending => {
2080 // On KVM implementations that use a paravirtualized
2081 // clock (e.g. x86), a flag must be set to indicate to
2082 // the guest kernel that a vCPU was suspended. The guest
2083 // kernel will use this flag to prevent the soft lockup
2084 // detection from triggering when this vCPU resumes,
2085 // which could happen days later in realtime.
2086 if requires_pvclock_ctrl {
2087 if let Err(e) = vcpu.pvclock_ctrl() {
2088 error!(
2089 "failed to tell hypervisor vcpu {} is suspending: {}",
2090 cpu_id, e
2091 );
2092 }
2093 }
2094 }
2095 VmRunMode::Breakpoint => {}
2096 VmRunMode::Exiting => break 'vcpu_loop,
2097 }
2098 }
2099 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2100 VcpuControl::Debug(d) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002101 match &to_gdb_tube {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002102 Some(ref ch) => {
2103 if let Err(e) = handle_debug_msg(
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07002104 cpu_id, &vcpu, &guest_mem, d, ch,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002105 ) {
2106 error!("Failed to handle gdb message: {}", e);
2107 }
2108 },
2109 None => {
2110 error!("VcpuControl::Debug received while GDB feature is disabled: {:?}", d);
Dylan Reidb0492662019-05-17 14:50:13 -07002111 }
2112 }
2113 }
Suleiman Souhlal2ac78b92021-02-01 12:33:26 +09002114 VcpuControl::MakeRT => {
2115 if run_rt && delay_rt {
2116 info!("Making vcpu {} RT\n", cpu_id);
2117 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
2118 if let Err(e) = set_rt_prio_limit(
2119 u64::from(DEFAULT_VCPU_RT_LEVEL))
2120 .and_then(|_|
2121 set_rt_round_robin(
2122 i32::from(DEFAULT_VCPU_RT_LEVEL)
2123 ))
2124 {
2125 warn!("Failed to set vcpu to real time: {}", e);
2126 }
2127 }
2128 }
Dylan Reidb0492662019-05-17 14:50:13 -07002129 }
2130 }
2131 }
2132 }
2133
2134 interrupted_by_signal = false;
2135
Steven Richman11dc6712020-09-02 15:39:14 -07002136 // Vcpus may have run a HLT instruction, which puts them into a state other than
2137 // VcpuRunState::Runnable. In that case, this call to wait_until_runnable blocks
2138 // until either the irqchip receives an interrupt for this vcpu, or until the main
2139 // thread kicks this vcpu as a result of some VmControl operation. In most IrqChip
2140 // implementations HLT instructions do not make it to crosvm, and thus this is a
2141 // no-op that always returns VcpuRunState::Runnable.
2142 match irq_chip.wait_until_runnable(&vcpu) {
2143 Ok(VcpuRunState::Runnable) => {}
2144 Ok(VcpuRunState::Interrupted) => interrupted_by_signal = true,
2145 Err(e) => error!(
2146 "error waiting for vcpu {} to become runnable: {}",
2147 cpu_id, e
2148 ),
2149 }
2150
2151 if !interrupted_by_signal {
2152 match vcpu.run(&vcpu_run_handle) {
2153 Ok(VcpuExit::IoIn { port, mut size }) => {
2154 let mut data = [0; 8];
2155 if size > data.len() {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002156 error!("unsupported IoIn size of {} bytes at port {:#x}", size, port);
Steven Richman11dc6712020-09-02 15:39:14 -07002157 size = data.len();
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002158 }
Steven Richman11dc6712020-09-02 15:39:14 -07002159 io_bus.read(port as u64, &mut data[..size]);
2160 if let Err(e) = vcpu.set_data(&data[..size]) {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002161 error!("failed to set return data for IoIn at port {:#x}: {}", port, e);
Steven Richman11dc6712020-09-02 15:39:14 -07002162 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002163 }
Steven Richman11dc6712020-09-02 15:39:14 -07002164 Ok(VcpuExit::IoOut {
2165 port,
2166 mut size,
2167 data,
2168 }) => {
2169 if size > data.len() {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002170 error!("unsupported IoOut size of {} bytes at port {:#x}", size, port);
Steven Richman11dc6712020-09-02 15:39:14 -07002171 size = data.len();
2172 }
2173 io_bus.write(port as u64, &data[..size]);
2174 }
2175 Ok(VcpuExit::MmioRead { address, size }) => {
2176 let mut data = [0; 8];
2177 mmio_bus.read(address, &mut data[..size]);
2178 // Setting data for mmio can not fail.
2179 let _ = vcpu.set_data(&data[..size]);
2180 }
2181 Ok(VcpuExit::MmioWrite {
2182 address,
2183 size,
2184 data,
2185 }) => {
2186 mmio_bus.write(address, &data[..size]);
2187 }
2188 Ok(VcpuExit::IoapicEoi { vector }) => {
2189 if let Err(e) = irq_chip.broadcast_eoi(vector) {
2190 error!(
2191 "failed to broadcast eoi {} on vcpu {}: {}",
2192 vector, cpu_id, e
2193 );
2194 }
2195 }
2196 Ok(VcpuExit::IrqWindowOpen) => {}
Leo Lai558460f2021-07-23 05:32:27 +00002197 Ok(VcpuExit::Hlt) => irq_chip.halted(cpu_id),
Steven Richman11dc6712020-09-02 15:39:14 -07002198 Ok(VcpuExit::Shutdown) => break,
2199 Ok(VcpuExit::FailEntry {
2200 hardware_entry_failure_reason,
2201 }) => {
2202 error!("vcpu hw run failure: {:#x}", hardware_entry_failure_reason);
Steven Richmanf32d0b42020-06-20 21:45:32 -07002203 break;
2204 }
Steven Richman11dc6712020-09-02 15:39:14 -07002205 Ok(VcpuExit::SystemEvent(_, _)) => break,
2206 Ok(VcpuExit::Debug { .. }) => {
2207 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2208 {
2209 let msg = VcpuDebugStatusMessage {
2210 cpu: cpu_id as usize,
2211 msg: VcpuDebugStatus::HitBreakPoint,
2212 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002213 if let Some(ref ch) = to_gdb_tube {
Steven Richman11dc6712020-09-02 15:39:14 -07002214 if let Err(e) = ch.send(msg) {
2215 error!("failed to notify breakpoint to GDB thread: {}", e);
2216 break;
2217 }
2218 }
2219 run_mode = VmRunMode::Breakpoint;
2220 }
2221 }
2222 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
2223 Err(e) => match e.errno() {
2224 libc::EINTR => interrupted_by_signal = true,
2225 libc::EAGAIN => {}
2226 _ => {
2227 error!("vcpu hit unknown error: {}", e);
2228 break;
2229 }
2230 },
2231 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002232 }
2233
2234 if interrupted_by_signal {
2235 if use_hypervisor_signals {
2236 // Try to clear the signal that we use to kick VCPU if it is pending before
2237 // attempting to handle pause requests.
2238 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
2239 error!("failed to clear pending signal: {}", e);
2240 break;
2241 }
2242 } else {
2243 vcpu.set_immediate_exit(false);
2244 }
David Tolnay8f3a2322018-11-30 17:11:35 -08002245 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002246
Steven Richman11dc6712020-09-02 15:39:14 -07002247 if let Err(e) = irq_chip.inject_interrupts(&vcpu) {
2248 error!("failed to inject interrupts for vcpu {}: {}", cpu_id, e);
2249 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002250 }
David Tolnay2bac1e72018-12-12 14:33:42 -08002251 })
2252 .map_err(Error::SpawnVcpu)
Zach Reizner39aa26b2017-12-12 18:03:23 -08002253}
2254
Zach Reiznera90649a2021-03-31 12:56:08 -07002255fn setup_vm_components(cfg: &Config) -> Result<VmComponents> {
David Tolnay2b089fc2019-03-04 15:33:22 -08002256 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +01002257 Some(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002258 open_file(
2259 initrd_path,
2260 true, /*read_only*/
2261 false, /*O_DIRECT*/
2262 )
2263 .map_err(|e| Error::OpenInitrd(initrd_path.to_owned(), e.into()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +01002264 )
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002265 } else {
2266 None
2267 };
2268
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002269 let vm_image = match cfg.executable_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +01002270 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002271 open_file(
2272 kernel_path,
2273 true, /*read_only*/
2274 false, /*O_DIRECT*/
2275 )
2276 .map_err(|e| Error::OpenKernel(kernel_path.to_owned(), e.into()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +01002277 ),
2278 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002279 open_file(bios_path, true /*read_only*/, false /*O_DIRECT*/)
Andrew Walbranbc55e302021-07-13 17:35:10 +01002280 .map_err(|e| Error::OpenBios(bios_path.to_owned(), e.into()))?,
2281 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002282 _ => panic!("Did not receive a bios or kernel, should be impossible."),
2283 };
2284
Will Deaconc48e7832021-07-30 19:03:06 +01002285 let swiotlb = if let Some(size) = cfg.swiotlb {
2286 Some(
2287 size.checked_mul(1024 * 1024)
2288 .ok_or(Error::SwiotlbTooLarge)?,
2289 )
2290 } else {
2291 match cfg.protected_vm {
2292 ProtectionType::Protected => Some(64 * 1024 * 1024),
2293 ProtectionType::Unprotected => None,
2294 }
2295 };
2296
Zach Reiznera90649a2021-03-31 12:56:08 -07002297 Ok(VmComponents {
Daniel Verkamp6a847062019-11-26 13:16:35 -08002298 memory_size: cfg
2299 .memory
2300 .unwrap_or(256)
2301 .checked_mul(1024 * 1024)
2302 .ok_or(Error::MemoryTooLarge)?,
Will Deaconc48e7832021-07-30 19:03:06 +01002303 swiotlb,
Dylan Reid059a1882018-07-23 17:58:09 -07002304 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -07002305 vcpu_affinity: cfg.vcpu_affinity.clone(),
Daniel Verkamp8a72afc2021-03-15 17:55:52 -07002306 cpu_clusters: cfg.cpu_clusters.clone(),
2307 cpu_capacity: cfg.cpu_capacity.clone(),
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002308 no_smt: cfg.no_smt,
Sergey Senozhatsky1e369c52021-04-13 20:23:51 +09002309 hugepages: cfg.hugepages,
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002310 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002311 android_fstab: cfg
2312 .android_fstab
2313 .as_ref()
David Tolnay2b089fc2019-03-04 15:33:22 -08002314 .map(|x| File::open(x).map_err(|e| Error::OpenAndroidFstab(x.to_path_buf(), e)))
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002315 .map_or(Ok(None), |v| v.map(Some))?,
Kansho Nishida282115b2019-12-18 13:13:14 +09002316 pstore: cfg.pstore.clone(),
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002317 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -07002318 extra_kernel_params: cfg.params.clone(),
Tomasz Jeznach42644642020-05-20 23:27:59 -07002319 acpi_sdts: cfg
2320 .acpi_tables
2321 .iter()
2322 .map(|path| SDT::from_file(path).map_err(|e| Error::OpenAcpiTable(path.clone(), e)))
2323 .collect::<Result<Vec<SDT>>>()?,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002324 rt_cpus: cfg.rt_cpus.clone(),
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002325 delay_rt: cfg.delay_rt,
Will Deacon7d2b8ac2020-10-06 18:51:12 +01002326 protected_vm: cfg.protected_vm,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002327 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznera90649a2021-03-31 12:56:08 -07002328 gdb: None,
Tomasz Jeznachccb26942021-03-30 22:44:11 -07002329 dmi_path: cfg.dmi_path.clone(),
Tomasz Jeznachd93c29f2021-04-12 11:00:24 -07002330 no_legacy: cfg.no_legacy,
Zach Reiznera90649a2021-03-31 12:56:08 -07002331 })
2332}
2333
Zach Reiznerdc748482021-04-14 13:59:30 -07002334pub fn run_config(cfg: Config) -> Result<()> {
2335 let components = setup_vm_components(&cfg)?;
2336
2337 let guest_mem_layout =
2338 Arch::guest_memory_layout(&components).map_err(Error::GuestMemoryLayout)?;
2339 let guest_mem = GuestMemory::new(&guest_mem_layout).map_err(Error::CreateGuestMemory)?;
2340 let mut mem_policy = MemoryPolicy::empty();
2341 if components.hugepages {
2342 mem_policy |= MemoryPolicy::USE_HUGEPAGES;
2343 }
2344 guest_mem.set_memory_policy(mem_policy);
2345 let kvm = Kvm::new_with_path(&cfg.kvm_device_path).map_err(Error::CreateKvm)?;
2346 let vm = KvmVm::new(&kvm, guest_mem).map_err(Error::CreateVm)?;
2347 let vm_clone = vm.try_clone().map_err(Error::CreateVm)?;
2348
2349 enum KvmIrqChip {
2350 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2351 Split(KvmSplitIrqChip),
2352 Kernel(KvmKernelIrqChip),
2353 }
2354
2355 impl KvmIrqChip {
2356 fn as_mut(&mut self) -> &mut dyn IrqChipArch {
2357 match self {
2358 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2359 KvmIrqChip::Split(i) => i,
2360 KvmIrqChip::Kernel(i) => i,
2361 }
2362 }
2363 }
2364
2365 let ioapic_host_tube;
2366 let mut irq_chip = if cfg.split_irqchip {
2367 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
2368 unimplemented!("KVM split irqchip mode only supported on x86 processors");
2369 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2370 {
2371 let (host_tube, ioapic_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2372 ioapic_host_tube = Some(host_tube);
2373 KvmIrqChip::Split(
2374 KvmSplitIrqChip::new(
2375 vm_clone,
2376 components.vcpu_count,
2377 ioapic_device_tube,
2378 Some(120),
2379 )
2380 .map_err(Error::CreateIrqChip)?,
2381 )
2382 }
2383 } else {
2384 ioapic_host_tube = None;
2385 KvmIrqChip::Kernel(
2386 KvmKernelIrqChip::new(vm_clone, components.vcpu_count).map_err(Error::CreateIrqChip)?,
2387 )
2388 };
2389
2390 run_vm::<KvmVcpu, KvmVm>(cfg, components, vm, irq_chip.as_mut(), ioapic_host_tube)
2391}
2392
2393fn run_vm<Vcpu, V>(
Zach Reiznera90649a2021-03-31 12:56:08 -07002394 cfg: Config,
2395 #[allow(unused_mut)] mut components: VmComponents,
Zach Reiznerdc748482021-04-14 13:59:30 -07002396 mut vm: V,
2397 irq_chip: &mut dyn IrqChipArch,
2398 ioapic_host_tube: Option<Tube>,
Zach Reiznera90649a2021-03-31 12:56:08 -07002399) -> Result<()>
2400where
2401 Vcpu: VcpuArch + 'static,
2402 V: VmArch + 'static,
Zach Reiznera90649a2021-03-31 12:56:08 -07002403{
2404 if cfg.sandbox {
2405 // Printing something to the syslog before entering minijail so that libc's syslogger has a
2406 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
2407 // access to those files will not be possible.
2408 info!("crosvm entering multiprocess mode");
2409 }
2410
Daniel Verkampf1439d42021-05-21 13:55:10 -07002411 #[cfg(feature = "usb")]
Zach Reiznera90649a2021-03-31 12:56:08 -07002412 let (usb_control_tube, usb_provider) =
2413 HostBackendDeviceProvider::new().map_err(Error::CreateUsbProvider)?;
Daniel Verkampf1439d42021-05-21 13:55:10 -07002414
Zach Reiznera90649a2021-03-31 12:56:08 -07002415 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
2416 // before any jailed devices have been spawned, so that we can catch any of them that fail very
2417 // quickly.
2418 let sigchld_fd = SignalFd::new(libc::SIGCHLD).map_err(Error::CreateSignalFd)?;
Dylan Reid059a1882018-07-23 17:58:09 -07002419
Zach Reiznera60744b2019-02-13 17:33:32 -08002420 let control_server_socket = match &cfg.socket_path {
2421 Some(path) => Some(UnlinkUnixSeqpacketListener(
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002422 UnixSeqpacketListener::bind(path).map_err(Error::CreateControlServer)?,
Zach Reiznera60744b2019-02-13 17:33:32 -08002423 )),
2424 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07002425 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002426
Zach Reiznera90649a2021-03-31 12:56:08 -07002427 let mut control_tubes = Vec::new();
2428
2429 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2430 if let Some(port) = cfg.gdb {
2431 // GDB needs a control socket to interrupt vcpus.
2432 let (gdb_host_tube, gdb_control_tube) = Tube::pair().map_err(Error::CreateTube)?;
2433 control_tubes.push(TaggedControlTube::Vm(gdb_host_tube));
2434 components.gdb = Some((port, gdb_control_tube));
2435 }
2436
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09002437 for wl_cfg in &cfg.vhost_user_wl {
2438 let wayland_host_tube = UnixSeqpacket::connect(&wl_cfg.vm_tube)
2439 .map(Tube::new)
2440 .map_err(Error::ConnectTube)?;
2441 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
2442 }
2443
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002444 let mut vhost_user_gpu_tubes = Vec::with_capacity(cfg.vhost_user_gpu.len());
2445 for _ in 0..cfg.vhost_user_gpu.len() {
2446 let (host_tube, device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2447 vhost_user_gpu_tubes.push((
2448 host_tube.try_clone().map_err(Error::CloneTube)?,
2449 device_tube,
2450 ));
2451 control_tubes.push(TaggedControlTube::VmMemory(host_tube));
2452 }
2453
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002454 let (wayland_host_tube, wayland_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2455 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
Dylan Reid059a1882018-07-23 17:58:09 -07002456 // Balloon gets a special socket so balloon requests can be forwarded from the main process.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002457 let (balloon_host_tube, balloon_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
Hikaru Nishidaaf3f3bb2021-05-21 12:03:54 +09002458 // Set recv timeout to avoid deadlock on sending BalloonControlCommand before guest is ready.
2459 balloon_host_tube
2460 .set_recv_timeout(Some(Duration::from_millis(100)))
2461 .map_err(Error::CreateTube)?;
Dylan Reid059a1882018-07-23 17:58:09 -07002462
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002463 // Create one control socket per disk.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002464 let mut disk_device_tubes = Vec::new();
2465 let mut disk_host_tubes = Vec::new();
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002466 let disk_count = cfg.disks.len();
2467 for _ in 0..disk_count {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002468 let (disk_host_tub, disk_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2469 disk_host_tubes.push(disk_host_tub);
2470 disk_device_tubes.push(disk_device_tube);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002471 }
2472
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002473 let mut pmem_device_tubes = Vec::new();
Daniel Verkampe1980a92020-02-07 11:00:55 -08002474 let pmem_count = cfg.pmem_devices.len();
2475 for _ in 0..pmem_count {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002476 let (pmem_host_tube, pmem_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2477 pmem_device_tubes.push(pmem_device_tube);
2478 control_tubes.push(TaggedControlTube::VmMsync(pmem_host_tube));
Daniel Verkampe1980a92020-02-07 11:00:55 -08002479 }
2480
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002481 let (gpu_host_tube, gpu_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2482 control_tubes.push(TaggedControlTube::VmMemory(gpu_host_tube));
Gurchetan Singh96beafc2019-05-15 09:46:52 -07002483
Zach Reiznerdc748482021-04-14 13:59:30 -07002484 if let Some(ioapic_host_tube) = ioapic_host_tube {
2485 control_tubes.push(TaggedControlTube::VmIrq(ioapic_host_tube));
2486 }
Zhuocheng Dingf2e90bf2019-12-02 15:50:20 +08002487
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002488 let battery = if cfg.battery_type.is_some() {
Daniel Verkampcfe49462021-08-19 17:11:05 -07002489 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(clippy::manual_map))]
Alex Lauf408c732020-11-10 18:24:04 +09002490 let jail = match simple_jail(&cfg, "battery")? {
Daniel Verkampcfe49462021-08-19 17:11:05 -07002491 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(unused_mut))]
Alex Lauf408c732020-11-10 18:24:04 +09002492 Some(mut jail) => {
2493 // Setup a bind mount to the system D-Bus socket if the powerd monitor is used.
2494 #[cfg(feature = "power-monitor-powerd")]
2495 {
Fergus Dall51200512021-08-19 12:54:26 +10002496 add_current_user_to_jail(&mut jail)?;
Alex Lauf408c732020-11-10 18:24:04 +09002497
2498 // Create a tmpfs in the device's root directory so that we can bind mount files.
2499 jail.mount_with_data(
2500 Path::new("none"),
2501 Path::new("/"),
2502 "tmpfs",
2503 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
2504 "size=67108864",
2505 )?;
2506
2507 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
2508 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
2509 }
2510 Some(jail)
2511 }
2512 None => None,
2513 };
2514 (&cfg.battery_type, jail)
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002515 } else {
2516 (&cfg.battery_type, None)
2517 };
2518
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002519 let map_request: Arc<Mutex<Option<ExternalMapping>>> = Arc::new(Mutex::new(None));
2520
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002521 let fs_count = cfg
2522 .shared_dirs
2523 .iter()
2524 .filter(|sd| sd.kind == SharedDirKind::FS)
2525 .count();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002526 let mut fs_device_tubes = Vec::with_capacity(fs_count);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002527 for _ in 0..fs_count {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002528 let (fs_host_tube, fs_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2529 control_tubes.push(TaggedControlTube::Fs(fs_host_tube));
2530 fs_device_tubes.push(fs_device_tube);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002531 }
2532
Zach Reiznerdc748482021-04-14 13:59:30 -07002533 let exit_evt = Event::new().map_err(Error::CreateEvent)?;
2534 let mut sys_allocator = Arch::create_system_allocator(vm.get_memory());
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09002535
2536 // Allocate the ramoops region first. AArch64::build_vm() assumes this.
2537 let ramoops_region = match &components.pstore {
2538 Some(pstore) => Some(
2539 arch::pstore::create_memory_region(&mut vm, &mut sys_allocator, &pstore)
2540 .map_err(Error::Pstore)?,
2541 ),
2542 None => None,
2543 };
2544
Zide Chen71435c12021-03-03 15:02:02 -08002545 let phys_max_addr = Arch::get_phys_max_addr();
Tomasz Nowickiab86d522021-09-22 05:50:46 +00002546 let mut devices = create_devices(
Zach Reiznerdc748482021-04-14 13:59:30 -07002547 &cfg,
2548 &mut vm,
2549 &mut sys_allocator,
2550 &exit_evt,
Zide Chen71435c12021-03-03 15:02:02 -08002551 phys_max_addr,
Zach Reiznerdc748482021-04-14 13:59:30 -07002552 &mut control_tubes,
2553 wayland_device_tube,
2554 gpu_device_tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002555 vhost_user_gpu_tubes,
Zach Reiznerdc748482021-04-14 13:59:30 -07002556 balloon_device_tube,
2557 &mut disk_device_tubes,
2558 &mut pmem_device_tubes,
2559 &mut fs_device_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07002560 #[cfg(feature = "usb")]
Zach Reiznerdc748482021-04-14 13:59:30 -07002561 usb_provider,
2562 Arc::clone(&map_request),
2563 )?;
2564
Peter Fangc2bba082021-04-19 18:40:24 -07002565 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Tomasz Nowickiab86d522021-09-22 05:50:46 +00002566 for device in devices
2567 .iter_mut()
2568 .filter_map(|(dev, _)| dev.as_pci_device_mut())
2569 {
Peter Fangc2bba082021-04-19 18:40:24 -07002570 let sdts = device
2571 .generate_acpi(components.acpi_sdts)
2572 .or_else(|| {
2573 error!("ACPI table generation error");
2574 None
2575 })
2576 .ok_or(Error::GenerateAcpi)?;
2577 components.acpi_sdts = sdts;
2578 }
2579
Kuo-Hsin Yang6139da62021-04-14 16:55:24 +08002580 #[cfg_attr(not(feature = "direct"), allow(unused_mut))]
Zach Reiznerdc748482021-04-14 13:59:30 -07002581 let mut linux = Arch::build_vm::<V, Vcpu>(
Trent Begin17ccaad2019-04-17 13:51:25 -06002582 components,
Zach Reiznerdc748482021-04-14 13:59:30 -07002583 &exit_evt,
2584 &mut sys_allocator,
Trent Begin17ccaad2019-04-17 13:51:25 -06002585 &cfg.serial_parameters,
Matt Delco45caf912019-11-13 08:11:09 -08002586 simple_jail(&cfg, "serial")?,
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002587 battery,
Zach Reiznera90649a2021-03-31 12:56:08 -07002588 vm,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09002589 ramoops_region,
Tomasz Nowickiab86d522021-09-22 05:50:46 +00002590 devices,
Zach Reiznerdc748482021-04-14 13:59:30 -07002591 irq_chip,
Trent Begin17ccaad2019-04-17 13:51:25 -06002592 )
David Tolnaybe034262019-03-04 17:48:36 -08002593 .map_err(Error::BuildVm)?;
Lepton Wu60893882018-11-21 11:06:18 -08002594
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08002595 #[cfg(feature = "direct")]
2596 if let Some(pmio) = &cfg.direct_pmio {
2597 let direct_io =
2598 Arc::new(devices::DirectIo::new(&pmio.path, false).map_err(Error::DirectIo)?);
2599 for range in pmio.ranges.iter() {
2600 linux
2601 .io_bus
2602 .insert_sync(direct_io.clone(), range.0, range.1)
2603 .unwrap();
2604 }
2605 };
2606
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002607 #[cfg(feature = "direct")]
2608 let mut irqs = Vec::new();
2609
2610 #[cfg(feature = "direct")]
2611 for irq in &cfg.direct_level_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07002612 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002613 warn!("irq {} already reserved.", irq);
2614 }
2615 let trigger = Event::new().map_err(Error::CreateEvent)?;
2616 let resample = Event::new().map_err(Error::CreateEvent)?;
2617 linux
2618 .irq_chip
2619 .register_irq_event(*irq, &trigger, Some(&resample))
2620 .unwrap();
2621 let direct_irq =
2622 devices::DirectIrq::new(trigger, Some(resample)).map_err(Error::DirectIrq)?;
2623 direct_irq.irq_enable(*irq).map_err(Error::DirectIrq)?;
2624 irqs.push(direct_irq);
2625 }
2626
2627 #[cfg(feature = "direct")]
2628 for irq in &cfg.direct_edge_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07002629 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002630 warn!("irq {} already reserved.", irq);
2631 }
2632 let trigger = Event::new().map_err(Error::CreateEvent)?;
2633 linux
2634 .irq_chip
2635 .register_irq_event(*irq, &trigger, None)
2636 .unwrap();
2637 let direct_irq = devices::DirectIrq::new(trigger, None).map_err(Error::DirectIrq)?;
2638 direct_irq.irq_enable(*irq).map_err(Error::DirectIrq)?;
2639 irqs.push(direct_irq);
2640 }
2641
Nicholas Verneb57c1242021-07-05 19:11:39 +10002642 let gralloc = RutabagaGralloc::new().map_err(Error::CreateGrallocError)?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002643 run_control(
2644 linux,
Zach Reiznerdc748482021-04-14 13:59:30 -07002645 sys_allocator,
Zach Reiznera60744b2019-02-13 17:33:32 -08002646 control_server_socket,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002647 control_tubes,
2648 balloon_host_tube,
2649 &disk_host_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07002650 #[cfg(feature = "usb")]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002651 usb_control_tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07002652 exit_evt,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002653 sigchld_fd,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002654 cfg.sandbox,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002655 Arc::clone(&map_request),
Gurchetan Singh293913c2020-12-09 10:44:13 -08002656 gralloc,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002657 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07002658}
2659
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002660#[allow(dead_code)]
2661fn add_vfio_device<V: VmArch, Vcpu: VcpuArch>(
2662 linux: &mut RunnableLinuxVm<V, Vcpu>,
2663 sys_allocator: &mut SystemAllocator,
2664 cfg: &Config,
2665 control_tubes: &mut Vec<TaggedControlTube>,
2666 vfio_path: &Path,
2667) -> Result<()> {
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002668 let mut endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>> = BTreeMap::new();
2669 let (vfio_pci_device, jail) = create_vfio_device(
2670 cfg,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002671 &linux.vm,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002672 sys_allocator,
2673 control_tubes,
2674 vfio_path,
Xiong Zhange19ab752021-05-20 18:18:46 +08002675 true,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002676 &mut endpoints,
2677 false,
2678 )?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002679
2680 let pci_address = Arch::register_pci_device(linux, vfio_pci_device, jail, sys_allocator)
2681 .map_err(Error::ConfigureHotPlugDevice)?;
2682
2683 let host_os_str = vfio_path.file_name().ok_or(Error::InvalidVfioPath)?;
2684 let host_str = host_os_str.to_str().ok_or(Error::InvalidVfioPath)?;
2685 let host_addr = PciAddress::from_string(host_str);
2686 let host_key = HostHotPlugKey::Vfio { host_addr };
2687 if let Some(hp_bus) = &linux.hotplug_bus {
2688 let mut hp_bus = hp_bus.lock();
2689 hp_bus.add_hotplug_device(host_key, pci_address);
2690 hp_bus.hot_plug(pci_address);
2691 return Ok(());
2692 }
2693
2694 Err(Error::NoHotPlugBus)
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002695}
2696
2697#[allow(dead_code)]
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002698fn remove_vfio_device<V: VmArch, Vcpu: VcpuArch>(
2699 linux: &RunnableLinuxVm<V, Vcpu>,
2700 vfio_path: &Path,
2701) -> Result<()> {
2702 let host_os_str = vfio_path.file_name().ok_or(Error::InvalidVfioPath)?;
2703 let host_str = host_os_str.to_str().ok_or(Error::InvalidVfioPath)?;
2704 let host_addr = PciAddress::from_string(host_str);
2705 let host_key = HostHotPlugKey::Vfio { host_addr };
2706 if let Some(hp_bus) = &linux.hotplug_bus {
2707 let mut hp_bus = hp_bus.lock();
2708 let pci_addr = hp_bus
2709 .get_hotplug_device(host_key)
2710 .ok_or(Error::InvalidHotPlugKey)?;
2711 hp_bus.hot_unplug(pci_addr);
2712 return Ok(());
2713 }
2714
2715 Err(Error::NoHotPlugBus)
2716}
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002717
Daniel Verkamp29409802021-02-24 14:46:19 -08002718/// Signals all running VCPUs to vmexit, sends VcpuControl message to each VCPU tube, and tells
2719/// `irq_chip` to stop blocking halted VCPUs. The channel message is set first because both the
Steven Richman11dc6712020-09-02 15:39:14 -07002720/// signal and the irq_chip kick could cause the VCPU thread to continue through the VCPU run
2721/// loop.
2722fn kick_all_vcpus(
2723 vcpu_handles: &[(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)],
Zach Reiznerdc748482021-04-14 13:59:30 -07002724 irq_chip: &dyn IrqChip,
Daniel Verkamp29409802021-02-24 14:46:19 -08002725 message: VcpuControl,
Steven Richman11dc6712020-09-02 15:39:14 -07002726) {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002727 for (handle, tube) in vcpu_handles {
Daniel Verkamp29409802021-02-24 14:46:19 -08002728 if let Err(e) = tube.send(message.clone()) {
2729 error!("failed to send VcpuControl: {}", e);
Steven Richman11dc6712020-09-02 15:39:14 -07002730 }
2731 let _ = handle.kill(SIGRTMIN() + 0);
2732 }
2733 irq_chip.kick_halted_vcpus();
2734}
2735
Zach Reiznerdc748482021-04-14 13:59:30 -07002736fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
2737 mut linux: RunnableLinuxVm<V, Vcpu>,
2738 mut sys_allocator: SystemAllocator,
Zach Reiznera60744b2019-02-13 17:33:32 -08002739 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002740 mut control_tubes: Vec<TaggedControlTube>,
2741 balloon_host_tube: Tube,
2742 disk_host_tubes: &[Tube],
Daniel Verkampf1439d42021-05-21 13:55:10 -07002743 #[cfg(feature = "usb")] usb_control_tube: Tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07002744 exit_evt: Event,
Zach Reizner55a9e502018-10-03 10:22:32 -07002745 sigchld_fd: SignalFd,
Lepton Wu20333e42019-03-14 10:48:03 -07002746 sandbox: bool,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002747 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Gurchetan Singh293913c2020-12-09 10:44:13 -08002748 mut gralloc: RutabagaGralloc,
Zach Reizner55a9e502018-10-03 10:22:32 -07002749) -> Result<()> {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002750 #[derive(PollToken)]
2751 enum Token {
2752 Exit,
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002753 Suspend,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002754 ChildSignal,
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002755 IrqFd { index: IrqEventIndex },
Zach Reiznera60744b2019-02-13 17:33:32 -08002756 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002757 VmControl { index: usize },
2758 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002759
Zach Reizner19ad1f32019-12-12 18:58:50 -08002760 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08002761 .set_raw_mode()
2762 .expect("failed to set terminal raw mode");
2763
Michael Hoylee392c462020-10-07 03:29:24 -07002764 let wait_ctx = WaitContext::build_with(&[
Zach Reiznerdc748482021-04-14 13:59:30 -07002765 (&exit_evt, Token::Exit),
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002766 (&linux.suspend_evt, Token::Suspend),
Zach Reiznerb2110be2019-07-23 15:55:03 -07002767 (&sigchld_fd, Token::ChildSignal),
2768 ])
Michael Hoylee392c462020-10-07 03:29:24 -07002769 .map_err(Error::WaitContextAdd)?;
Zach Reiznerb2110be2019-07-23 15:55:03 -07002770
Zach Reiznera60744b2019-02-13 17:33:32 -08002771 if let Some(socket_server) = &control_server_socket {
Michael Hoylee392c462020-10-07 03:29:24 -07002772 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08002773 .add(socket_server, Token::VmControlServer)
Michael Hoylee392c462020-10-07 03:29:24 -07002774 .map_err(Error::WaitContextAdd)?;
Zach Reiznera60744b2019-02-13 17:33:32 -08002775 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002776 for (index, socket) in control_tubes.iter().enumerate() {
Michael Hoylee392c462020-10-07 03:29:24 -07002777 wait_ctx
Zach Reizner55a9e502018-10-03 10:22:32 -07002778 .add(socket.as_ref(), Token::VmControl { index })
Michael Hoylee392c462020-10-07 03:29:24 -07002779 .map_err(Error::WaitContextAdd)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002780 }
2781
Steven Richmanf32d0b42020-06-20 21:45:32 -07002782 let events = linux
2783 .irq_chip
2784 .irq_event_tokens()
Michael Hoylee392c462020-10-07 03:29:24 -07002785 .map_err(Error::WaitContextAdd)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002786
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002787 for (index, _gsi, evt) in events {
Michael Hoylee392c462020-10-07 03:29:24 -07002788 wait_ctx
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002789 .add(&evt, Token::IrqFd { index })
Michael Hoylee392c462020-10-07 03:29:24 -07002790 .map_err(Error::WaitContextAdd)?;
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002791 }
2792
Lepton Wu20333e42019-03-14 10:48:03 -07002793 if sandbox {
2794 // Before starting VCPUs, in case we started with some capabilities, drop them all.
2795 drop_capabilities().map_err(Error::DropCapabilities)?;
2796 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08002797
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002798 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2799 // Create a channel for GDB thread.
2800 let (to_gdb_channel, from_vcpu_channel) = if linux.gdb.is_some() {
2801 let (s, r) = mpsc::channel();
2802 (Some(s), Some(r))
2803 } else {
2804 (None, None)
2805 };
2806
Steven Richmanf32d0b42020-06-20 21:45:32 -07002807 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
2808 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
Steven Richmanf32d0b42020-06-20 21:45:32 -07002809 let use_hypervisor_signals = !linux
2810 .vm
2811 .get_hypervisor()
2812 .check_capability(&HypervisorCap::ImmediateExit);
Zach Reizner304e7312020-09-29 16:00:24 -07002813 setup_vcpu_signal_handler::<Vcpu>(use_hypervisor_signals)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002814
Zach Reizner304e7312020-09-29 16:00:24 -07002815 let vcpus: Vec<Option<_>> = match linux.vcpus.take() {
Andrew Walbran9cfdbd92021-01-11 17:40:34 +00002816 Some(vec) => vec.into_iter().map(Some).collect(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002817 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
2818 };
Daniel Verkamp94c35272019-09-12 13:31:30 -07002819 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Dylan Reidb0492662019-05-17 14:50:13 -07002820 let (to_vcpu_channel, from_main_channel) = mpsc::channel();
Daniel Verkampc677fb42020-09-08 13:47:49 -07002821 let vcpu_affinity = match linux.vcpu_affinity.clone() {
2822 Some(VcpuAffinity::Global(v)) => v,
2823 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(),
2824 None => Default::default(),
2825 };
Zach Reizner55a9e502018-10-03 10:22:32 -07002826 let handle = run_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002827 cpu_id,
Zach Reizner55a9e502018-10-03 10:22:32 -07002828 vcpu,
Michael Hoyle685316f2020-09-16 15:29:20 -07002829 linux.vm.try_clone().map_err(Error::CloneEvent)?,
Zach Reiznerdc748482021-04-14 13:59:30 -07002830 linux.irq_chip.try_box_clone().map_err(Error::CloneEvent)?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002831 linux.vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002832 linux.rt_cpus.contains(&cpu_id),
Daniel Verkampc677fb42020-09-08 13:47:49 -07002833 vcpu_affinity,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002834 linux.delay_rt,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002835 linux.no_smt,
Zach Reizner55a9e502018-10-03 10:22:32 -07002836 vcpu_thread_barrier.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002837 linux.has_bios,
Zach Reizner55a9e502018-10-03 10:22:32 -07002838 linux.io_bus.clone(),
2839 linux.mmio_bus.clone(),
Zach Reiznerdc748482021-04-14 13:59:30 -07002840 exit_evt.try_clone().map_err(Error::CloneEvent)?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002841 linux.vm.check_capability(VmCap::PvClockSuspend),
Dylan Reidb0492662019-05-17 14:50:13 -07002842 from_main_channel,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002843 use_hypervisor_signals,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002844 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2845 to_gdb_channel.clone(),
Zach Reizner55a9e502018-10-03 10:22:32 -07002846 )?;
Dylan Reidb0492662019-05-17 14:50:13 -07002847 vcpu_handles.push((handle, to_vcpu_channel));
Dylan Reid059a1882018-07-23 17:58:09 -07002848 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002849
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002850 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2851 // Spawn GDB thread.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002852 if let Some((gdb_port_num, gdb_control_tube)) = linux.gdb.take() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002853 let to_vcpu_channels = vcpu_handles
2854 .iter()
2855 .map(|(_handle, channel)| channel.clone())
2856 .collect();
2857 let target = GdbStub::new(
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002858 gdb_control_tube,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002859 to_vcpu_channels,
2860 from_vcpu_channel.unwrap(), // Must succeed to unwrap()
2861 );
2862 thread::Builder::new()
2863 .name("gdb".to_owned())
2864 .spawn(move || gdb_thread(target, gdb_port_num))
2865 .map_err(Error::SpawnGdbServer)?;
2866 };
2867
Dylan Reid059a1882018-07-23 17:58:09 -07002868 vcpu_thread_barrier.wait();
2869
Charles William Dick54045012021-07-27 19:11:53 +09002870 let mut balloon_stats_id: u64 = 0;
2871
Michael Hoylee392c462020-10-07 03:29:24 -07002872 'wait: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002873 let events = {
Michael Hoylee392c462020-10-07 03:29:24 -07002874 match wait_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002875 Ok(v) => v,
2876 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08002877 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08002878 break;
2879 }
2880 }
2881 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002882
Steven Richmanf32d0b42020-06-20 21:45:32 -07002883 if let Err(e) = linux.irq_chip.process_delayed_irq_events() {
2884 warn!("can't deliver delayed irqs: {}", e);
2885 }
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002886
Zach Reiznera60744b2019-02-13 17:33:32 -08002887 let mut vm_control_indices_to_remove = Vec::new();
Michael Hoylee392c462020-10-07 03:29:24 -07002888 for event in events.iter().filter(|e| e.is_readable) {
2889 match event.token {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002890 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002891 info!("vcpu requested shutdown");
Michael Hoylee392c462020-10-07 03:29:24 -07002892 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002893 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002894 Token::Suspend => {
2895 info!("VM requested suspend");
2896 linux.suspend_evt.read().unwrap();
Zach Reiznerdc748482021-04-14 13:59:30 -07002897 kick_all_vcpus(
2898 &vcpu_handles,
2899 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08002900 VcpuControl::RunState(VmRunMode::Suspending),
Zach Reiznerdc748482021-04-14 13:59:30 -07002901 );
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002902 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002903 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002904 // Print all available siginfo structs, then exit the loop.
David Tolnayf5032762018-12-03 10:46:45 -08002905 while let Some(siginfo) = sigchld_fd.read().map_err(Error::SignalFd)? {
Zach Reizner3ba00982019-01-23 19:04:43 -08002906 let pid = siginfo.ssi_pid;
2907 let pid_label = match linux.pid_debug_label_map.get(&pid) {
2908 Some(label) => format!("{} (pid {})", label, pid),
2909 None => format!("pid {}", pid),
2910 };
David Tolnayf5032762018-12-03 10:46:45 -08002911 error!(
2912 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08002913 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08002914 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08002915 }
Michael Hoylee392c462020-10-07 03:29:24 -07002916 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002917 }
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002918 Token::IrqFd { index } => {
2919 if let Err(e) = linux.irq_chip.service_irq_event(index) {
2920 error!("failed to signal irq {}: {}", index, e);
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002921 }
2922 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002923 Token::VmControlServer => {
2924 if let Some(socket_server) = &control_server_socket {
2925 match socket_server.accept() {
2926 Ok(socket) => {
Michael Hoylee392c462020-10-07 03:29:24 -07002927 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08002928 .add(
2929 &socket,
2930 Token::VmControl {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002931 index: control_tubes.len(),
Zach Reiznera60744b2019-02-13 17:33:32 -08002932 },
2933 )
Michael Hoylee392c462020-10-07 03:29:24 -07002934 .map_err(Error::WaitContextAdd)?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002935 control_tubes.push(TaggedControlTube::Vm(Tube::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08002936 }
2937 Err(e) => error!("failed to accept socket: {}", e),
2938 }
2939 }
2940 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002941 Token::VmControl { index } => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002942 if let Some(socket) = control_tubes.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002943 match socket {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002944 TaggedControlTube::Vm(tube) => match tube.recv::<VmRequest>() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002945 Ok(request) => {
2946 let mut run_mode_opt = None;
2947 let response = request.execute(
2948 &mut run_mode_opt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002949 &balloon_host_tube,
Charles William Dick54045012021-07-27 19:11:53 +09002950 &mut balloon_stats_id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002951 disk_host_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07002952 #[cfg(feature = "usb")]
2953 Some(&usb_control_tube),
2954 #[cfg(not(feature = "usb"))]
2955 None,
Chuanxiao Dong256be3a2020-04-27 16:39:33 +08002956 &mut linux.bat_control,
Suleiman Souhlal2ac78b92021-02-01 12:33:26 +09002957 &vcpu_handles,
Jakub Starond99cd0a2019-04-11 14:09:39 -07002958 );
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002959 if let Err(e) = tube.send(&response) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002960 error!("failed to send VmResponse: {}", e);
2961 }
2962 if let Some(run_mode) = run_mode_opt {
2963 info!("control socket changed run mode to {}", run_mode);
2964 match run_mode {
2965 VmRunMode::Exiting => {
Michael Hoylee392c462020-10-07 03:29:24 -07002966 break 'wait;
Jakub Starond99cd0a2019-04-11 14:09:39 -07002967 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002968 other => {
Chuanxiao Dong2bbe85c2020-11-12 17:18:07 +08002969 if other == VmRunMode::Running {
Daniel Verkampda4e8a92021-07-21 13:49:02 -07002970 for dev in &linux.resume_notify_devices {
2971 dev.lock().resume_imminent();
2972 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002973 }
Steven Richman11dc6712020-09-02 15:39:14 -07002974 kick_all_vcpus(
2975 &vcpu_handles,
Zach Reiznerdc748482021-04-14 13:59:30 -07002976 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08002977 VcpuControl::RunState(other),
Steven Richman11dc6712020-09-02 15:39:14 -07002978 );
Zach Reizner6a8fdd92019-01-16 14:38:41 -08002979 }
2980 }
2981 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002982 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07002983 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002984 if let TubeError::Disconnected = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002985 vm_control_indices_to_remove.push(index);
2986 } else {
2987 error!("failed to recv VmRequest: {}", e);
2988 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002989 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07002990 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002991 TaggedControlTube::VmMemory(tube) => {
2992 match tube.recv::<VmMemoryRequest>() {
2993 Ok(request) => {
2994 let response = request.execute(
2995 &mut linux.vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07002996 &mut sys_allocator,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002997 Arc::clone(&map_request),
2998 &mut gralloc,
2999 );
3000 if let Err(e) = tube.send(&response) {
3001 error!("failed to send VmMemoryControlResponse: {}", e);
3002 }
3003 }
3004 Err(e) => {
3005 if let TubeError::Disconnected = e {
3006 vm_control_indices_to_remove.push(index);
3007 } else {
3008 error!("failed to recv VmMemoryControlRequest: {}", e);
3009 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003010 }
3011 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003012 }
3013 TaggedControlTube::VmIrq(tube) => match tube.recv::<VmIrqRequest>() {
Xiong Zhang2515b752019-09-19 10:29:02 +08003014 Ok(request) => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07003015 let response = {
3016 let irq_chip = &mut linux.irq_chip;
3017 request.execute(
3018 |setup| match setup {
3019 IrqSetup::Event(irq, ev) => {
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003020 if let Some(event_index) = irq_chip
3021 .register_irq_event(irq, ev, None)?
3022 {
3023 match wait_ctx.add(
3024 ev,
3025 Token::IrqFd {
3026 index: event_index
3027 },
3028 ) {
3029 Err(e) => {
3030 warn!("failed to add IrqFd to poll context: {}", e);
3031 Err(e)
3032 },
3033 Ok(_) => {
3034 Ok(())
3035 }
3036 }
3037 } else {
3038 Ok(())
3039 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07003040 }
3041 IrqSetup::Route(route) => irq_chip.route_irq(route),
3042 },
Zach Reiznerdc748482021-04-14 13:59:30 -07003043 &mut sys_allocator,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003044 )
3045 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003046 if let Err(e) = tube.send(&response) {
Xiong Zhang2515b752019-09-19 10:29:02 +08003047 error!("failed to send VmIrqResponse: {}", e);
3048 }
3049 }
3050 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003051 if let TubeError::Disconnected = e {
Xiong Zhang2515b752019-09-19 10:29:02 +08003052 vm_control_indices_to_remove.push(index);
3053 } else {
3054 error!("failed to recv VmIrqRequest: {}", e);
3055 }
3056 }
3057 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003058 TaggedControlTube::VmMsync(tube) => {
3059 match tube.recv::<VmMsyncRequest>() {
3060 Ok(request) => {
3061 let response = request.execute(&mut linux.vm);
3062 if let Err(e) = tube.send(&response) {
3063 error!("failed to send VmMsyncResponse: {}", e);
3064 }
3065 }
3066 Err(e) => {
3067 if let TubeError::Disconnected = e {
3068 vm_control_indices_to_remove.push(index);
3069 } else {
3070 error!("failed to recv VmMsyncRequest: {}", e);
3071 }
Daniel Verkampe1980a92020-02-07 11:00:55 -08003072 }
3073 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003074 }
3075 TaggedControlTube::Fs(tube) => match tube.recv::<FsMappingRequest>() {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003076 Ok(request) => {
3077 let response =
Zach Reiznerdc748482021-04-14 13:59:30 -07003078 request.execute(&mut linux.vm, &mut sys_allocator);
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003079 if let Err(e) = tube.send(&response) {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003080 error!("failed to send VmResponse: {}", e);
3081 }
3082 }
3083 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003084 if let TubeError::Disconnected = e {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003085 vm_control_indices_to_remove.push(index);
3086 } else {
3087 error!("failed to recv VmResponse: {}", e);
3088 }
3089 }
3090 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08003091 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003092 }
3093 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003094 }
3095 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003096
Vikram Auradkarede68c72021-07-01 14:33:54 -07003097 // It's possible more data is readable and buffered while the socket is hungup,
3098 // so don't delete the tube from the poll context until we're sure all the
3099 // data is read.
3100 // Below case covers a condition where we have received a hungup event and the tube is not
3101 // readable.
3102 // In case of readable tube, once all data is read, any attempt to read more data on hungup
3103 // tube should fail. On such failure, we get Disconnected error and index gets added to
3104 // vm_control_indices_to_remove by the time we reach here.
3105 for event in events.iter().filter(|e| e.is_hungup && !e.is_readable) {
3106 if let Token::VmControl { index } = event.token {
3107 vm_control_indices_to_remove.push(index);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003108 }
3109 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003110
3111 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08003112 // preserves correct indexes as each element is removed.
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07003113 vm_control_indices_to_remove.sort_unstable_by_key(|&k| Reverse(k));
Zach Reiznera60744b2019-02-13 17:33:32 -08003114 vm_control_indices_to_remove.dedup();
3115 for index in vm_control_indices_to_remove {
Michael Hoylee392c462020-10-07 03:29:24 -07003116 // Delete the socket from the `wait_ctx` synchronously. Otherwise, the kernel will do
3117 // this automatically when the FD inserted into the `wait_ctx` is closed after this
Zide Chen89584072019-11-14 10:33:51 -08003118 // if-block, but this removal can be deferred unpredictably. In some instances where the
Michael Hoylee392c462020-10-07 03:29:24 -07003119 // system is under heavy load, we can even get events returned by `wait_ctx` for an FD
Zide Chen89584072019-11-14 10:33:51 -08003120 // that has already been closed. Because the token associated with that spurious event
3121 // now belongs to a different socket, the control loop will start to interact with
3122 // sockets that might not be ready to use. This can cause incorrect hangup detection or
3123 // blocking on a socket that will never be ready. See also: crbug.com/1019986
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003124 if let Some(socket) = control_tubes.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07003125 wait_ctx.delete(socket).map_err(Error::WaitContextDelete)?;
Zide Chen89584072019-11-14 10:33:51 -08003126 }
3127
3128 // This line implicitly drops the socket at `index` when it gets returned by
3129 // `swap_remove`. After this line, the socket at `index` is not the one from
3130 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
Michael Hoylee392c462020-10-07 03:29:24 -07003131 // use `wait_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003132 control_tubes.swap_remove(index);
3133 if let Some(tube) = control_tubes.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07003134 wait_ctx
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003135 .modify(tube, EventType::Read, Token::VmControl { index })
Michael Hoylee392c462020-10-07 03:29:24 -07003136 .map_err(Error::WaitContextAdd)?;
Zach Reiznera60744b2019-02-13 17:33:32 -08003137 }
3138 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003139 }
3140
Zach Reiznerdc748482021-04-14 13:59:30 -07003141 kick_all_vcpus(
3142 &vcpu_handles,
3143 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003144 VcpuControl::RunState(VmRunMode::Exiting),
Zach Reiznerdc748482021-04-14 13:59:30 -07003145 );
Steven Richman11dc6712020-09-02 15:39:14 -07003146 for (handle, _) in vcpu_handles {
3147 if let Err(e) = handle.join() {
3148 error!("failed to join vcpu thread: {:?}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003149 }
3150 }
3151
Daniel Verkamp94c35272019-09-12 13:31:30 -07003152 // Explicitly drop the VM structure here to allow the devices to clean up before the
3153 // control sockets are closed when this function exits.
3154 mem::drop(linux);
3155
Zach Reizner19ad1f32019-12-12 18:58:50 -08003156 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08003157 .set_canon_mode()
3158 .expect("failed to restore canonical mode for terminal");
3159
3160 Ok(())
3161}