blob: c685af383bfc8d3ad85bfe970b8b9358775d2426 [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Hikaru Nishida584e52c2021-04-27 17:37:08 +09005use std::cmp::Reverse;
Zide Chendfc4b882021-03-10 16:35:37 -08006use std::collections::BTreeMap;
Jakub Starona3411ea2019-04-24 10:55:25 -07007use std::convert::TryFrom;
John Batesb220eac2020-09-14 17:03:02 -07008#[cfg(feature = "gpu")]
9use std::env;
Dylan Reid059a1882018-07-23 17:58:09 -070010use std::ffi::CStr;
Dylan Reid059a1882018-07-23 17:58:09 -070011use std::fs::{File, OpenOptions};
Hikaru Nishida584e52c2021-04-27 17:37:08 +090012use std::io::{self, stdin};
Steven Richmanf32d0b42020-06-20 21:45:32 -070013use std::iter;
Daniel Verkamp94c35272019-09-12 13:31:30 -070014use std::mem;
David Tolnay2b089fc2019-03-04 15:33:22 -080015use std::net::Ipv4Addr;
Zach Reiznera60744b2019-02-13 17:33:32 -080016use std::os::unix::net::UnixStream;
Zach Reizner39aa26b2017-12-12 18:03:23 -080017use std::path::{Path, PathBuf};
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +090018use std::ptr;
Chirantan Ekbote448516e2018-07-24 16:07:42 -070019use std::str;
Dylan Reidb0492662019-05-17 14:50:13 -070020use std::sync::{mpsc, Arc, Barrier};
Hikaru Nishida584e52c2021-04-27 17:37:08 +090021use std::time::Duration;
Dylan Reidb0492662019-05-17 14:50:13 -070022
Zach Reizner39aa26b2017-12-12 18:03:23 -080023use std::thread;
24use std::thread::JoinHandle;
25
Peter Fangad3b24e2021-06-21 00:43:29 -070026use libc::{self, c_int, gid_t, uid_t, EINVAL};
Zach Reizner39aa26b2017-12-12 18:03:23 -080027
Tomasz Jeznach42644642020-05-20 23:27:59 -070028use acpi_tables::sdt::SDT;
29
Richard5afeafa2021-07-26 19:02:09 -070030use crate::error::{Error, Result};
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +090031use base::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080032use base::*;
Keiichi Watanabe553d2192021-08-16 16:42:27 +090033use devices::serial_device::{SerialHardware, SerialParameters};
Zide Chenafdb9382021-06-17 12:04:43 -070034use devices::vfio::{VfioCommonSetup, VfioCommonTrait};
Jason Macnakd659a0d2021-03-15 15:33:01 -070035#[cfg(feature = "gpu")]
36use devices::virtio::gpu::{DEFAULT_DISPLAY_HEIGHT, DEFAULT_DISPLAY_WIDTH};
Keiichi Watanabefb36e0c2021-08-13 18:48:31 +090037use devices::virtio::vhost::user::vmm::{
Richard5afeafa2021-07-26 19:02:09 -070038 Block as VhostUserBlock, Console as VhostUserConsole, Fs as VhostUserFs,
39 Mac80211Hwsim as VhostUserMac80211Hwsim, Net as VhostUserNet, Wl as VhostUserWl,
Keiichi Watanabe60686582021-03-12 04:53:51 +090040};
Zach Reizner65b98f12019-11-22 17:34:58 -080041#[cfg(feature = "gpu")]
42use devices::virtio::EventDevice;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070043use devices::virtio::{self, Console, VirtioDevice};
paulhsiace17e6e2020-08-28 18:37:45 +080044#[cfg(feature = "audio")]
45use devices::Ac97Dev;
Will Deaconc48e7832021-07-30 19:03:06 +010046use devices::ProtectionType;
Xiong Zhang17b0daf2019-04-23 17:14:50 +080047use devices::{
Xiong Zhang262e6182021-05-18 14:58:07 +080048 self, HostHotPlugKey, IrqChip, IrqEventIndex, KvmKernelIrqChip, PciAddress, PciBridge,
49 PciDevice, PcieRootPort, VcpuRunState, VfioContainer, VfioDevice, VfioPciDevice,
50 VirtioPciDevice,
Xiong Zhang17b0daf2019-04-23 17:14:50 +080051};
Daniel Verkampf1439d42021-05-21 13:55:10 -070052#[cfg(feature = "usb")]
53use devices::{HostBackendDeviceProvider, XhciController};
Steven Richmanf32d0b42020-06-20 21:45:32 -070054use hypervisor::kvm::{Kvm, KvmVcpu, KvmVm};
Xiong Zhangdea7dbb2021-07-26 14:49:03 +080055use hypervisor::{HypervisorCap, Vcpu, VcpuExit, VcpuRunHandle, Vm, VmCap};
Allen Webbf3024c82020-06-19 07:19:48 -070056use minijail::{self, Minijail};
Richard5afeafa2021-07-26 19:02:09 -070057use net_util::{MacAddress, Tap};
Xiong Zhang87a3b442019-10-29 17:32:44 +080058use resources::{Alloc, MmioType, SystemAllocator};
Gurchetan Singh293913c2020-12-09 10:44:13 -080059use rutabaga_gfx::RutabagaGralloc;
Dylan Reidb0492662019-05-17 14:50:13 -070060use sync::Mutex;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080061use vm_control::*;
Sergey Senozhatskyd78d05b2021-04-13 20:59:58 +090062use vm_memory::{GuestAddress, GuestMemory, MemoryPolicy};
Zach Reizner39aa26b2017-12-12 18:03:23 -080063
Keiichi Watanabec5262e92020-10-21 15:57:33 +090064#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
65use crate::gdb::{gdb_thread, GdbStub};
Keiichi Watanabef3a37f42021-01-21 15:41:11 +090066use crate::{
Woody Chow5890b702021-02-12 14:57:02 +090067 Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption, VhostUserFsOption,
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +090068 VhostUserOption, VhostUserWlOption,
Keiichi Watanabef3a37f42021-01-21 15:41:11 +090069};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070070use arch::{
Keiichi Watanabe553d2192021-08-16 16:42:27 +090071 self, LinuxArch, RunnableLinuxVm, VcpuAffinity, VirtioDeviceStub, VmComponents, VmImage,
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070072};
Sonny Raoed517d12018-02-13 22:09:43 -080073
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080074#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070075use {
76 aarch64::AArch64 as Arch,
Steven Richman11dc6712020-09-02 15:39:14 -070077 devices::IrqChipAArch64 as IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -070078 hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
79};
Zach Reizner55a9e502018-10-03 10:22:32 -070080#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070081use {
Steven Richman11dc6712020-09-02 15:39:14 -070082 devices::{IrqChipX86_64 as IrqChipArch, KvmSplitIrqChip},
83 hypervisor::{VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
Steven Richmanf32d0b42020-06-20 21:45:32 -070084 x86_64::X8664arch as Arch,
85};
Zach Reizner39aa26b2017-12-12 18:03:23 -080086
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080087enum TaggedControlTube {
88 Fs(Tube),
89 Vm(Tube),
90 VmMemory(Tube),
91 VmIrq(Tube),
92 VmMsync(Tube),
Jakub Starond99cd0a2019-04-11 14:09:39 -070093}
94
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080095impl AsRef<Tube> for TaggedControlTube {
96 fn as_ref(&self) -> &Tube {
97 use self::TaggedControlTube::*;
Jakub Starond99cd0a2019-04-11 14:09:39 -070098 match &self {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080099 Fs(tube) | Vm(tube) | VmMemory(tube) | VmIrq(tube) | VmMsync(tube) => tube,
Jakub Starond99cd0a2019-04-11 14:09:39 -0700100 }
101 }
102}
103
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800104impl AsRawDescriptor for TaggedControlTube {
Michael Hoylee392c462020-10-07 03:29:24 -0700105 fn as_raw_descriptor(&self) -> RawDescriptor {
Michael Hoylea596a072020-11-10 19:32:45 -0800106 self.as_ref().as_raw_descriptor()
Jakub Starond99cd0a2019-04-11 14:09:39 -0700107 }
108}
109
Andrew Walbranf50bab62020-07-07 13:22:53 +0100110fn get_max_open_files() -> Result<u64> {
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +0900111 let mut buf = mem::MaybeUninit::<libc::rlimit64>::zeroed();
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900112
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +0900113 // Safe because this will only modify `buf` and we check the return value.
114 let res = unsafe { libc::prlimit64(0, libc::RLIMIT_NOFILE, ptr::null(), buf.as_mut_ptr()) };
115 if res == 0 {
116 // Safe because the kernel guarantees that the struct is fully initialized.
117 let limit = unsafe { buf.assume_init() };
118 Ok(limit.rlim_max)
119 } else {
120 Err(Error::GetMaxOpenFiles(io::Error::last_os_error()))
121 }
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900122}
123
Matt Delcoc24ad782020-02-14 13:24:36 -0800124struct SandboxConfig<'a> {
125 limit_caps: bool,
126 log_failures: bool,
127 seccomp_policy: &'a Path,
128 uid_map: Option<&'a str>,
129 gid_map: Option<&'a str>,
130}
131
Zach Reizner44863792019-06-26 14:22:08 -0700132fn create_base_minijail(
133 root: &Path,
Matt Delcoc24ad782020-02-14 13:24:36 -0800134 r_limit: Option<u64>,
135 config: Option<&SandboxConfig>,
Zach Reizner44863792019-06-26 14:22:08 -0700136) -> Result<Minijail> {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800137 // All child jails run in a new user namespace without any users mapped,
138 // they run as nobody unless otherwise configured.
David Tolnay5bbbf612018-12-01 17:49:30 -0800139 let mut j = Minijail::new().map_err(Error::DeviceJail)?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800140
141 if let Some(config) = config {
142 j.namespace_pids();
143 j.namespace_user();
144 j.namespace_user_disable_setgroups();
145 if config.limit_caps {
146 // Don't need any capabilities.
147 j.use_caps(0);
148 }
149 if let Some(uid_map) = config.uid_map {
150 j.uidmap(uid_map).map_err(Error::SettingUidMap)?;
151 }
152 if let Some(gid_map) = config.gid_map {
153 j.gidmap(gid_map).map_err(Error::SettingGidMap)?;
154 }
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900155 // Run in a new mount namespace.
156 j.namespace_vfs();
157
Matt Delcoc24ad782020-02-14 13:24:36 -0800158 // Run in an empty network namespace.
159 j.namespace_net();
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900160
161 // Don't allow the device to gain new privileges.
Matt Delcoc24ad782020-02-14 13:24:36 -0800162 j.no_new_privs();
163
164 // By default we'll prioritize using the pre-compiled .bpf over the .policy
165 // file (the .bpf is expected to be compiled using "trap" as the failure
166 // behavior instead of the default "kill" behavior).
167 // Refer to the code comment for the "seccomp-log-failures"
168 // command-line parameter for an explanation about why the |log_failures|
169 // flag forces the use of .policy files (and the build-time alternative to
170 // this run-time flag).
171 let bpf_policy_file = config.seccomp_policy.with_extension("bpf");
172 if bpf_policy_file.exists() && !config.log_failures {
173 j.parse_seccomp_program(&bpf_policy_file)
174 .map_err(Error::DeviceJail)?;
175 } else {
176 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP,
177 // which will correctly kill the entire device process if a worker
178 // thread commits a seccomp violation.
179 j.set_seccomp_filter_tsync();
180 if config.log_failures {
181 j.log_seccomp_filter_failures();
182 }
183 j.parse_seccomp_filters(&config.seccomp_policy.with_extension("policy"))
184 .map_err(Error::DeviceJail)?;
185 }
186 j.use_seccomp_filter();
187 // Don't do init setup.
188 j.run_as_init();
189 }
190
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900191 // Only pivot_root if we are not re-using the current root directory.
192 if root != Path::new("/") {
193 // It's safe to call `namespace_vfs` multiple times.
194 j.namespace_vfs();
195 j.enter_pivot_root(root).map_err(Error::DevicePivotRoot)?;
196 }
Matt Delco45caf912019-11-13 08:11:09 -0800197
Matt Delcoc24ad782020-02-14 13:24:36 -0800198 // Most devices don't need to open many fds.
199 let limit = if let Some(r) = r_limit { r } else { 1024u64 };
200 j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)
201 .map_err(Error::SettingMaxOpenFiles)?;
202
Zach Reizner39aa26b2017-12-12 18:03:23 -0800203 Ok(j)
204}
205
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800206fn simple_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
Lepton Wu9105e9f2019-03-14 11:38:31 -0700207 if cfg.sandbox {
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800208 let pivot_root: &str = option_env!("DEFAULT_PIVOT_ROOT").unwrap_or("/var/empty");
209 // A directory for a jailed device's pivot root.
210 let root_path = Path::new(pivot_root);
211 if !root_path.exists() {
212 return Err(Error::PivotRootDoesntExist(pivot_root));
213 }
214 let policy_path: PathBuf = cfg.seccomp_policy_dir.join(policy);
Matt Delcoc24ad782020-02-14 13:24:36 -0800215 let config = SandboxConfig {
216 limit_caps: true,
217 log_failures: cfg.seccomp_log_failures,
218 seccomp_policy: &policy_path,
219 uid_map: None,
220 gid_map: None,
221 };
222 Ok(Some(create_base_minijail(root_path, None, Some(&config))?))
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800223 } else {
224 Ok(None)
225 }
226}
227
David Tolnayfd0971d2019-03-04 17:15:57 -0800228type DeviceResult<T = VirtioDeviceStub> = std::result::Result<T, Error>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800229
Andrew Walbran4cad30a2021-06-28 15:58:08 +0000230fn create_block_device(cfg: &Config, disk: &DiskOption, disk_device_tube: Tube) -> DeviceResult {
Junichi Uekawa7bea39f2021-07-16 14:05:06 +0900231 let raw_image: File = open_file(&disk.path, disk.read_only, disk.o_direct)
Andrew Walbranbc55e302021-07-13 17:35:10 +0100232 .map_err(|e| Error::Disk(disk.path.clone(), e.into()))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800233 // Lock the disk image to prevent other crosvm instances from using it.
234 let lock_op = if disk.read_only {
235 FlockOperation::LockShared
236 } else {
237 FlockOperation::LockExclusive
238 };
239 flock(&raw_image, lock_op, true).map_err(Error::DiskImageLock)?;
240
Dylan Reid503c5ab2020-07-17 11:20:07 -0700241 let dev = if disk::async_ok(&raw_image).map_err(Error::CreateDiskError)? {
242 let async_file = disk::create_async_disk_file(raw_image).map_err(Error::CreateDiskError)?;
243 Box::new(
244 virtio::BlockAsync::new(
245 virtio::base_features(cfg.protected_vm),
246 async_file,
247 disk.read_only,
248 disk.sparse,
249 disk.block_size,
Daniel Verkampdd0ee592021-03-29 13:05:22 -0700250 disk.id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800251 Some(disk_device_tube),
Dylan Reid503c5ab2020-07-17 11:20:07 -0700252 )
253 .map_err(Error::BlockDeviceNew)?,
254 ) as Box<dyn VirtioDevice>
255 } else {
256 let disk_file = disk::create_disk_file(raw_image).map_err(Error::CreateDiskError)?;
257 Box::new(
258 virtio::Block::new(
259 virtio::base_features(cfg.protected_vm),
260 disk_file,
261 disk.read_only,
262 disk.sparse,
263 disk.block_size,
264 disk.id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800265 Some(disk_device_tube),
Dylan Reid503c5ab2020-07-17 11:20:07 -0700266 )
267 .map_err(Error::BlockDeviceNew)?,
268 ) as Box<dyn VirtioDevice>
269 };
David Tolnay2b089fc2019-03-04 15:33:22 -0800270
271 Ok(VirtioDeviceStub {
Dylan Reid503c5ab2020-07-17 11:20:07 -0700272 dev,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700273 jail: simple_jail(cfg, "block_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800274 })
275}
276
Keiichi Watanabef3a37f42021-01-21 15:41:11 +0900277fn create_vhost_user_block_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
278 let dev = VhostUserBlock::new(virtio::base_features(cfg.protected_vm), &opt.socket)
279 .map_err(Error::VhostUserBlockDeviceNew)?;
280
281 Ok(VirtioDeviceStub {
282 dev: Box::new(dev),
283 // no sandbox here because virtqueue handling is exported to a different process.
284 jail: None,
285 })
286}
287
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +0900288fn create_vhost_user_console_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
289 let dev = VhostUserConsole::new(virtio::base_features(cfg.protected_vm), &opt.socket)
290 .map_err(Error::VhostUserConsoleDeviceNew)?;
291
292 Ok(VirtioDeviceStub {
293 dev: Box::new(dev),
294 // no sandbox here because virtqueue handling is exported to a different process.
295 jail: None,
296 })
297}
298
Woody Chow5890b702021-02-12 14:57:02 +0900299fn create_vhost_user_fs_device(cfg: &Config, option: &VhostUserFsOption) -> DeviceResult {
300 let dev = VhostUserFs::new(
301 virtio::base_features(cfg.protected_vm),
302 &option.socket,
303 &option.tag,
304 )
305 .map_err(Error::VhostUserFsDeviceNew)?;
306
307 Ok(VirtioDeviceStub {
308 dev: Box::new(dev),
309 // no sandbox here because virtqueue handling is exported to a different process.
310 jail: None,
311 })
312}
313
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900314fn create_vhost_user_mac80211_hwsim_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
315 let dev = VhostUserMac80211Hwsim::new(virtio::base_features(cfg.protected_vm), &opt.socket)
316 .map_err(Error::VhostUserMac80211HwsimNew)?;
317
318 Ok(VirtioDeviceStub {
319 dev: Box::new(dev),
320 // no sandbox here because virtqueue handling is exported to a different process.
321 jail: None,
322 })
323}
324
David Tolnay2b089fc2019-03-04 15:33:22 -0800325fn create_rng_device(cfg: &Config) -> DeviceResult {
Keiichi Watanabef70350b2020-11-24 21:57:53 +0900326 let dev =
327 virtio::Rng::new(virtio::base_features(cfg.protected_vm)).map_err(Error::RngDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800328
329 Ok(VirtioDeviceStub {
330 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700331 jail: simple_jail(cfg, "rng_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800332 })
333}
334
335#[cfg(feature = "tpm")]
336fn create_tpm_device(cfg: &Config) -> DeviceResult {
337 use std::ffi::CString;
338 use std::fs;
339 use std::process;
David Tolnay2b089fc2019-03-04 15:33:22 -0800340
341 let tpm_storage: PathBuf;
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700342 let mut tpm_jail = simple_jail(cfg, "tpm_device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800343
344 match &mut tpm_jail {
345 Some(jail) => {
346 // Create a tmpfs in the device's root directory for tpm
347 // simulator storage. The size is 20*1024, or 20 KB.
348 jail.mount_with_data(
349 Path::new("none"),
350 Path::new("/"),
351 "tmpfs",
352 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
353 "size=20480",
354 )?;
355
356 let crosvm_ids = add_crosvm_user_to_jail(jail, "tpm")?;
357
358 let pid = process::id();
359 let tpm_pid_dir = format!("/run/vm/tpm.{}", pid);
360 tpm_storage = Path::new(&tpm_pid_dir).to_owned();
David Tolnayfd0971d2019-03-04 17:15:57 -0800361 fs::create_dir_all(&tpm_storage)
362 .map_err(|e| Error::CreateTpmStorage(tpm_storage.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800363 let tpm_pid_dir_c = CString::new(tpm_pid_dir).expect("no nul bytes");
David Tolnayfd0971d2019-03-04 17:15:57 -0800364 chown(&tpm_pid_dir_c, crosvm_ids.uid, crosvm_ids.gid)
365 .map_err(Error::ChownTpmStorage)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800366
367 jail.mount_bind(&tpm_storage, &tpm_storage, true)?;
368 }
369 None => {
370 // Path used inside cros_sdk which does not have /run/vm.
371 tpm_storage = Path::new("/tmp/tpm-simulator").to_owned();
372 }
373 }
374
375 let dev = virtio::Tpm::new(tpm_storage);
376
377 Ok(VirtioDeviceStub {
378 dev: Box::new(dev),
379 jail: tpm_jail,
380 })
381}
382
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700383fn create_single_touch_device(
384 cfg: &Config,
385 single_touch_spec: &TouchDeviceOption,
386 idx: u32,
387) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800388 let socket = single_touch_spec
389 .get_path()
390 .into_unix_stream()
391 .map_err(|e| {
392 error!("failed configuring virtio single touch: {:?}", e);
393 e
394 })?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800395
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800396 let (width, height) = single_touch_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700397 let dev = virtio::new_single_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700398 idx,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700399 socket,
400 width,
401 height,
402 virtio::base_features(cfg.protected_vm),
403 )
404 .map_err(Error::InputDeviceNew)?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800405 Ok(VirtioDeviceStub {
406 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700407 jail: simple_jail(cfg, "input_device")?,
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800408 })
409}
410
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700411fn create_multi_touch_device(
412 cfg: &Config,
413 multi_touch_spec: &TouchDeviceOption,
414 idx: u32,
415) -> DeviceResult {
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000416 let socket = multi_touch_spec
417 .get_path()
418 .into_unix_stream()
419 .map_err(|e| {
420 error!("failed configuring virtio multi touch: {:?}", e);
421 e
422 })?;
423
424 let (width, height) = multi_touch_spec.get_size();
425 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700426 idx,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000427 socket,
428 width,
429 height,
430 virtio::base_features(cfg.protected_vm),
431 )
432 .map_err(Error::InputDeviceNew)?;
433
434 Ok(VirtioDeviceStub {
435 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700436 jail: simple_jail(cfg, "input_device")?,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000437 })
438}
439
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700440fn create_trackpad_device(
441 cfg: &Config,
442 trackpad_spec: &TouchDeviceOption,
443 idx: u32,
444) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800445 let socket = trackpad_spec.get_path().into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800446 error!("failed configuring virtio trackpad: {}", e);
447 e
448 })?;
449
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800450 let (width, height) = trackpad_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700451 let dev = virtio::new_trackpad(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700452 idx,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700453 socket,
454 width,
455 height,
456 virtio::base_features(cfg.protected_vm),
457 )
458 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800459
460 Ok(VirtioDeviceStub {
461 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700462 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800463 })
464}
465
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700466fn create_mouse_device<T: IntoUnixStream>(cfg: &Config, mouse_socket: T, idx: u32) -> DeviceResult {
Zach Reizner65b98f12019-11-22 17:34:58 -0800467 let socket = mouse_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800468 error!("failed configuring virtio mouse: {}", e);
469 e
470 })?;
471
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700472 let dev = virtio::new_mouse(idx, socket, virtio::base_features(cfg.protected_vm))
Noah Goldd4ca29b2020-10-27 12:21:52 -0700473 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800474
475 Ok(VirtioDeviceStub {
476 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700477 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800478 })
479}
480
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700481fn create_keyboard_device<T: IntoUnixStream>(
482 cfg: &Config,
483 keyboard_socket: T,
484 idx: u32,
485) -> DeviceResult {
Zach Reizner65b98f12019-11-22 17:34:58 -0800486 let socket = keyboard_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800487 error!("failed configuring virtio keyboard: {}", e);
488 e
489 })?;
490
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700491 let dev = virtio::new_keyboard(idx, socket, virtio::base_features(cfg.protected_vm))
Noah Goldd4ca29b2020-10-27 12:21:52 -0700492 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800493
494 Ok(VirtioDeviceStub {
495 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700496 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800497 })
498}
499
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700500fn create_switches_device<T: IntoUnixStream>(
501 cfg: &Config,
502 switches_socket: T,
503 idx: u32,
504) -> DeviceResult {
Daniel Norman5e23df72021-03-11 10:11:02 -0800505 let socket = switches_socket.into_unix_stream().map_err(|e| {
506 error!("failed configuring virtio switches: {}", e);
507 e
508 })?;
509
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700510 let dev = virtio::new_switches(idx, socket, virtio::base_features(cfg.protected_vm))
Daniel Norman5e23df72021-03-11 10:11:02 -0800511 .map_err(Error::InputDeviceNew)?;
512
513 Ok(VirtioDeviceStub {
514 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700515 jail: simple_jail(cfg, "input_device")?,
Daniel Norman5e23df72021-03-11 10:11:02 -0800516 })
517}
518
David Tolnay2b089fc2019-03-04 15:33:22 -0800519fn create_vinput_device(cfg: &Config, dev_path: &Path) -> DeviceResult {
520 let dev_file = OpenOptions::new()
521 .read(true)
522 .write(true)
523 .open(dev_path)
David Tolnayfd0971d2019-03-04 17:15:57 -0800524 .map_err(|e| Error::OpenVinput(dev_path.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800525
Noah Goldd4ca29b2020-10-27 12:21:52 -0700526 let dev = virtio::new_evdev(dev_file, virtio::base_features(cfg.protected_vm))
527 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800528
529 Ok(VirtioDeviceStub {
530 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700531 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800532 })
533}
534
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800535fn create_balloon_device(cfg: &Config, tube: Tube) -> DeviceResult {
536 let dev = virtio::Balloon::new(virtio::base_features(cfg.protected_vm), tube)
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100537 .map_err(Error::BalloonDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800538
539 Ok(VirtioDeviceStub {
540 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700541 jail: simple_jail(cfg, "balloon_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800542 })
543}
544
Michael Hoylea596a072020-11-10 19:32:45 -0800545fn create_tap_net_device(cfg: &Config, tap_fd: RawDescriptor) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800546 // Safe because we ensure that we get a unique handle to the fd.
547 let tap = unsafe {
Michael Hoylea596a072020-11-10 19:32:45 -0800548 Tap::from_raw_descriptor(
549 validate_raw_descriptor(tap_fd).map_err(Error::ValidateRawDescriptor)?,
550 )
551 .map_err(Error::CreateTapDevice)?
David Tolnay2b089fc2019-03-04 15:33:22 -0800552 };
553
Xiong Zhang773c7072020-03-20 10:39:55 +0800554 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
555 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700556 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800557 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
558 vq_pairs = 1;
559 }
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100560 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +0100561 let dev = virtio::Net::from(features, tap, vq_pairs).map_err(Error::NetDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800562
563 Ok(VirtioDeviceStub {
564 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700565 jail: simple_jail(cfg, "net_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800566 })
567}
568
569fn create_net_device(
570 cfg: &Config,
571 host_ip: Ipv4Addr,
572 netmask: Ipv4Addr,
573 mac_address: MacAddress,
574 mem: &GuestMemory,
575) -> DeviceResult {
Xiong Zhang773c7072020-03-20 10:39:55 +0800576 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
577 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700578 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800579 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
580 vq_pairs = 1;
581 }
582
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100583 let features = virtio::base_features(cfg.protected_vm);
David Tolnay2b089fc2019-03-04 15:33:22 -0800584 let dev = if cfg.vhost_net {
Will Deacon81d5adb2020-10-06 18:37:48 +0100585 let dev = virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(
Christian Blichmann2f5d4b62021-03-10 18:08:08 +0100586 &cfg.vhost_net_device_path,
Will Deacon81d5adb2020-10-06 18:37:48 +0100587 features,
588 host_ip,
589 netmask,
590 mac_address,
591 mem,
592 )
593 .map_err(Error::VhostNetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800594 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800595 } else {
Will Deacon81d5adb2020-10-06 18:37:48 +0100596 let dev = virtio::Net::<Tap>::new(features, host_ip, netmask, mac_address, vq_pairs)
Xiong Zhang773c7072020-03-20 10:39:55 +0800597 .map_err(Error::NetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800598 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800599 };
600
601 let policy = if cfg.vhost_net {
Matt Delco45caf912019-11-13 08:11:09 -0800602 "vhost_net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800603 } else {
Matt Delco45caf912019-11-13 08:11:09 -0800604 "net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800605 };
606
607 Ok(VirtioDeviceStub {
608 dev,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700609 jail: simple_jail(cfg, policy)?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800610 })
611}
612
Keiichi Watanabe60686582021-03-12 04:53:51 +0900613fn create_vhost_user_net_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
614 let dev = VhostUserNet::new(virtio::base_features(cfg.protected_vm), &opt.socket)
615 .map_err(Error::VhostUserNetDeviceNew)?;
616
617 Ok(VirtioDeviceStub {
618 dev: Box::new(dev),
619 // no sandbox here because virtqueue handling is exported to a different process.
620 jail: None,
621 })
622}
623
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900624fn create_vhost_user_wl_device(cfg: &Config, opt: &VhostUserWlOption) -> DeviceResult {
625 // The crosvm wl device expects us to connect the tube before it will accept a vhost-user
626 // connection.
627 let dev = VhostUserWl::new(virtio::base_features(cfg.protected_vm), &opt.socket)
628 .map_err(Error::VhostUserWlDeviceNew)?;
629
630 Ok(VirtioDeviceStub {
631 dev: Box::new(dev),
632 // no sandbox here because virtqueue handling is exported to a different process.
633 jail: None,
634 })
635}
636
David Tolnay2b089fc2019-03-04 15:33:22 -0800637#[cfg(feature = "gpu")]
638fn create_gpu_device(
639 cfg: &Config,
Michael Hoyle685316f2020-09-16 15:29:20 -0700640 exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800641 gpu_device_tube: Tube,
642 resource_bridges: Vec<Tube>,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900643 wayland_socket_path: Option<&PathBuf>,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700644 x_display: Option<String>,
Zach Reizner65b98f12019-11-22 17:34:58 -0800645 event_devices: Vec<EventDevice>,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700646 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800647) -> DeviceResult {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700648 let mut display_backends = vec![
649 virtio::DisplayBackend::X(x_display),
Jason Macnak60eb1fb2020-01-09 14:36:29 -0800650 virtio::DisplayBackend::Stub,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700651 ];
652
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700653 let wayland_socket_dirs = cfg
654 .wayland_socket_paths
655 .iter()
656 .map(|(_name, path)| path.parent())
657 .collect::<Option<Vec<_>>>()
658 .ok_or(Error::InvalidWaylandPath)?;
659
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900660 if let Some(socket_path) = wayland_socket_path {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700661 display_backends.insert(
662 0,
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700663 virtio::DisplayBackend::Wayland(Some(socket_path.to_owned())),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700664 );
665 }
666
David Tolnay2b089fc2019-03-04 15:33:22 -0800667 let dev = virtio::Gpu::new(
Michael Hoyle685316f2020-09-16 15:29:20 -0700668 exit_evt.try_clone().map_err(Error::CloneEvent)?,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800669 Some(gpu_device_tube),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800670 resource_bridges,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700671 display_backends,
Jason Macnakcc7070b2019-11-06 14:48:12 -0800672 cfg.gpu_parameters.as_ref().unwrap(),
Zach Reizner65b98f12019-11-22 17:34:58 -0800673 event_devices,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700674 map_request,
675 cfg.sandbox,
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100676 virtio::base_features(cfg.protected_vm),
Gurchetan Singh781d9752021-02-15 17:45:22 -0800677 cfg.wayland_socket_paths.clone(),
David Tolnay2b089fc2019-03-04 15:33:22 -0800678 );
679
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700680 let jail = match simple_jail(cfg, "gpu_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800681 Some(mut jail) => {
682 // Create a tmpfs in the device's root directory so that we can bind mount the
683 // dri directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
684 jail.mount_with_data(
685 Path::new("none"),
686 Path::new("/"),
687 "tmpfs",
688 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
689 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800690 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800691
692 // Device nodes required for DRM.
693 let sys_dev_char_path = Path::new("/sys/dev/char");
David Tolnayfd0971d2019-03-04 17:15:57 -0800694 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800695 let sys_devices_path = Path::new("/sys/devices");
David Tolnayfd0971d2019-03-04 17:15:57 -0800696 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
Jason Macnak23400522020-08-28 09:10:46 -0700697
David Tolnay2b089fc2019-03-04 15:33:22 -0800698 let drm_dri_path = Path::new("/dev/dri");
Jason Macnak23400522020-08-28 09:10:46 -0700699 if drm_dri_path.exists() {
700 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
701 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800702
John Batesb220eac2020-09-14 17:03:02 -0700703 // Prepare GPU shader disk cache directory.
704 if let Some(cache_dir) = cfg
705 .gpu_parameters
706 .as_ref()
707 .and_then(|params| params.cache_path.as_ref())
708 {
709 if cfg!(any(target_arch = "arm", target_arch = "aarch64")) && cfg.sandbox {
710 warn!("shader caching not yet supported on ARM with sandbox enabled");
711 env::set_var("MESA_GLSL_CACHE_DISABLE", "true");
712 } else {
John Bates04059732020-10-01 15:58:55 -0700713 env::set_var("MESA_GLSL_CACHE_DISABLE", "false");
John Batesb220eac2020-09-14 17:03:02 -0700714 env::set_var("MESA_GLSL_CACHE_DIR", cache_dir);
715 if let Some(cache_size) = cfg
716 .gpu_parameters
717 .as_ref()
718 .and_then(|params| params.cache_size.as_ref())
719 {
720 env::set_var("MESA_GLSL_CACHE_MAX_SIZE", cache_size);
721 }
722 let shadercache_path = Path::new(cache_dir);
723 jail.mount_bind(shadercache_path, shadercache_path, true)?;
724 }
725 }
726
David Riley06787c52019-07-24 12:09:07 -0700727 // If the ARM specific devices exist on the host, bind mount them in.
728 let mali0_path = Path::new("/dev/mali0");
729 if mali0_path.exists() {
730 jail.mount_bind(mali0_path, mali0_path, true)?;
731 }
732
733 let pvr_sync_path = Path::new("/dev/pvr_sync");
734 if pvr_sync_path.exists() {
735 jail.mount_bind(pvr_sync_path, pvr_sync_path, true)?;
736 }
737
Gurchetan Singhb66d6f62019-11-08 10:41:29 -0800738 // If the udmabuf driver exists on the host, bind mount it in.
739 let udmabuf_path = Path::new("/dev/udmabuf");
740 if udmabuf_path.exists() {
741 jail.mount_bind(udmabuf_path, udmabuf_path, true)?;
742 }
743
David Tolnay2b089fc2019-03-04 15:33:22 -0800744 // Libraries that are required when mesa drivers are dynamically loaded.
Chia-I Wud562b1a2020-12-27 21:08:27 -0800745 let lib_dirs = &[
746 "/usr/lib",
747 "/usr/lib64",
748 "/lib",
749 "/lib64",
John Batesef085de2021-03-15 08:55:54 -0700750 "/usr/share/glvnd",
Chia-I Wud562b1a2020-12-27 21:08:27 -0800751 "/usr/share/vulkan",
752 ];
David Riley06787c52019-07-24 12:09:07 -0700753 for dir in lib_dirs {
754 let dir_path = Path::new(dir);
755 if dir_path.exists() {
756 jail.mount_bind(dir_path, dir_path, false)?;
757 }
758 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800759
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700760 // Bind mount the wayland socket's directory into jail's root. This is necessary since
761 // each new wayland context must open() the socket. If the wayland socket is ever
762 // destroyed and remade in the same host directory, new connections will be possible
763 // without restarting the wayland device.
764 for dir in &wayland_socket_dirs {
765 jail.mount_bind(dir, dir, true)?;
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700766 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800767
768 add_crosvm_user_to_jail(&mut jail, "gpu")?;
769
David Riley54e660b2019-07-24 17:22:50 -0700770 // pvr driver requires read access to /proc/self/task/*/comm.
771 let proc_path = Path::new("/proc");
772 jail.mount(
773 proc_path,
774 proc_path,
775 "proc",
776 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY) as usize,
777 )?;
778
John Bates0d9d0e32020-12-03 11:37:33 -0800779 // To enable perfetto tracing, we need to give access to the perfetto service IPC
780 // endpoints.
781 let perfetto_path = Path::new("/run/perfetto");
782 if perfetto_path.exists() {
783 jail.mount_bind(perfetto_path, perfetto_path, true)?;
784 }
785
David Tolnay2b089fc2019-03-04 15:33:22 -0800786 Some(jail)
787 }
788 None => None,
789 };
790
791 Ok(VirtioDeviceStub {
792 dev: Box::new(dev),
793 jail,
794 })
795}
796
797fn create_wayland_device(
798 cfg: &Config,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800799 control_tube: Tube,
800 resource_bridge: Option<Tube>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800801) -> DeviceResult {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900802 let wayland_socket_dirs = cfg
803 .wayland_socket_paths
804 .iter()
805 .map(|(_name, path)| path.parent())
806 .collect::<Option<Vec<_>>>()
807 .ok_or(Error::InvalidWaylandPath)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800808
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100809 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +0100810 let dev = virtio::Wl::new(
811 features,
812 cfg.wayland_socket_paths.clone(),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800813 control_tube,
Will Deacon81d5adb2020-10-06 18:37:48 +0100814 resource_bridge,
815 )
816 .map_err(Error::WaylandDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800817
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700818 let jail = match simple_jail(cfg, "wl_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800819 Some(mut jail) => {
820 // Create a tmpfs in the device's root directory so that we can bind mount the wayland
821 // socket directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
822 jail.mount_with_data(
823 Path::new("none"),
824 Path::new("/"),
825 "tmpfs",
826 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
827 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800828 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800829
830 // Bind mount the wayland socket's directory into jail's root. This is necessary since
831 // each new wayland context must open() the socket. If the wayland socket is ever
832 // destroyed and remade in the same host directory, new connections will be possible
833 // without restarting the wayland device.
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900834 for dir in &wayland_socket_dirs {
835 jail.mount_bind(dir, dir, true)?;
836 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800837 add_crosvm_user_to_jail(&mut jail, "Wayland")?;
838
839 Some(jail)
840 }
841 None => None,
842 };
843
844 Ok(VirtioDeviceStub {
845 dev: Box::new(dev),
846 jail,
847 })
848}
849
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900850#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
851fn create_video_device(
852 cfg: &Config,
853 typ: devices::virtio::VideoDeviceType,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800854 resource_bridge: Tube,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900855) -> DeviceResult {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700856 let jail = match simple_jail(cfg, "video_device")? {
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900857 Some(mut jail) => {
858 match typ {
859 devices::virtio::VideoDeviceType::Decoder => {
860 add_crosvm_user_to_jail(&mut jail, "video-decoder")?
861 }
862 devices::virtio::VideoDeviceType::Encoder => {
863 add_crosvm_user_to_jail(&mut jail, "video-encoder")?
864 }
865 };
866
867 // Create a tmpfs in the device's root directory so that we can bind mount files.
868 jail.mount_with_data(
869 Path::new("none"),
870 Path::new("/"),
871 "tmpfs",
872 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
873 "size=67108864",
874 )?;
875
876 // Render node for libvda.
877 let dev_dri_path = Path::new("/dev/dri/renderD128");
878 jail.mount_bind(dev_dri_path, dev_dri_path, false)?;
879
David Stevense341d0a2020-10-08 18:02:32 +0900880 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
881 {
882 // Device nodes used by libdrm through minigbm in libvda on AMD devices.
883 let sys_dev_char_path = Path::new("/sys/dev/char");
884 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
885 let sys_devices_path = Path::new("/sys/devices");
886 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
887
888 // Required for loading dri libraries loaded by minigbm on AMD devices.
889 let lib_dir = Path::new("/usr/lib64");
890 jail.mount_bind(lib_dir, lib_dir, false)?;
891 }
892
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900893 // Device nodes required by libchrome which establishes Mojo connection in libvda.
894 let dev_urandom_path = Path::new("/dev/urandom");
895 jail.mount_bind(dev_urandom_path, dev_urandom_path, false)?;
896 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
897 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
898
899 Some(jail)
900 }
901 None => None,
902 };
903
904 Ok(VirtioDeviceStub {
905 dev: Box::new(devices::virtio::VideoDevice::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100906 virtio::base_features(cfg.protected_vm),
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900907 typ,
908 Some(resource_bridge),
909 )),
910 jail,
911 })
912}
913
914#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
915fn register_video_device(
916 devs: &mut Vec<VirtioDeviceStub>,
Daniel Verkampffb59122021-03-18 14:06:15 -0700917 video_tube: Tube,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900918 cfg: &Config,
919 typ: devices::virtio::VideoDeviceType,
920) -> std::result::Result<(), Error> {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800921 devs.push(create_video_device(cfg, typ, video_tube)?);
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900922 Ok(())
923}
924
David Tolnay2b089fc2019-03-04 15:33:22 -0800925fn create_vhost_vsock_device(cfg: &Config, cid: u64, mem: &GuestMemory) -> DeviceResult {
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100926 let features = virtio::base_features(cfg.protected_vm);
Christian Blichmann2f5d4b62021-03-10 18:08:08 +0100927 let dev = virtio::vhost::Vsock::new(&cfg.vhost_vsock_device_path, features, cid, mem)
928 .map_err(Error::VhostVsockDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800929
930 Ok(VirtioDeviceStub {
931 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700932 jail: simple_jail(cfg, "vhost_vsock_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800933 })
934}
935
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900936fn create_fs_device(
937 cfg: &Config,
938 uid_map: &str,
939 gid_map: &str,
940 src: &Path,
941 tag: &str,
942 fs_cfg: virtio::fs::passthrough::Config,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800943 device_tube: Tube,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900944) -> DeviceResult {
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900945 let max_open_files = get_max_open_files()?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800946 let j = if cfg.sandbox {
947 let seccomp_policy = cfg.seccomp_policy_dir.join("fs_device");
948 let config = SandboxConfig {
949 limit_caps: false,
950 uid_map: Some(uid_map),
951 gid_map: Some(gid_map),
952 log_failures: cfg.seccomp_log_failures,
953 seccomp_policy: &seccomp_policy,
954 };
Chirantan Ekbote34d45e52020-04-20 18:15:02 +0900955 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
956 // We want bind mounts from the parent namespaces to propagate into the fs device's
957 // namespace.
958 jail.set_remount_mode(libc::MS_SLAVE);
959
960 jail
Matt Delcoc24ad782020-02-14 13:24:36 -0800961 } else {
962 create_base_minijail(src, Some(max_open_files), None)?
963 };
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900964
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100965 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900966 // TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic
967 // when num_queues > 1.
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +0900968 let dev =
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800969 virtio::fs::Fs::new(features, tag, 1, fs_cfg, device_tube).map_err(Error::FsDeviceNew)?;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900970
971 Ok(VirtioDeviceStub {
972 dev: Box::new(dev),
973 jail: Some(j),
974 })
975}
976
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +0900977fn create_9p_device(
978 cfg: &Config,
979 uid_map: &str,
980 gid_map: &str,
981 src: &Path,
982 tag: &str,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +0900983 mut p9_cfg: p9::Config,
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +0900984) -> DeviceResult {
985 let max_open_files = get_max_open_files()?;
986 let (jail, root) = if cfg.sandbox {
987 let seccomp_policy = cfg.seccomp_policy_dir.join("9p_device");
988 let config = SandboxConfig {
989 limit_caps: false,
990 uid_map: Some(uid_map),
991 gid_map: Some(gid_map),
992 log_failures: cfg.seccomp_log_failures,
993 seccomp_policy: &seccomp_policy,
994 };
David Tolnay2b089fc2019-03-04 15:33:22 -0800995
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +0900996 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
997 // We want bind mounts from the parent namespaces to propagate into the 9p server's
998 // namespace.
999 jail.set_remount_mode(libc::MS_SLAVE);
Chirantan Ekbote055de382020-01-24 12:16:58 +09001000
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001001 // The shared directory becomes the root of the device's file system.
1002 let root = Path::new("/");
1003 (Some(jail), root)
1004 } else {
1005 // There's no mount namespace so we tell the server to treat the source directory as the
1006 // root.
1007 (None, src)
David Tolnay2b089fc2019-03-04 15:33:22 -08001008 };
1009
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001010 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001011 p9_cfg.root = root.into();
1012 let dev = virtio::P9::new(features, tag, p9_cfg).map_err(Error::P9DeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001013
1014 Ok(VirtioDeviceStub {
1015 dev: Box::new(dev),
1016 jail,
1017 })
1018}
1019
Jakub Starona3411ea2019-04-24 10:55:25 -07001020fn create_pmem_device(
1021 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001022 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001023 resources: &mut SystemAllocator,
1024 disk: &DiskOption,
1025 index: usize,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001026 pmem_device_tube: Tube,
Jakub Starona3411ea2019-04-24 10:55:25 -07001027) -> DeviceResult {
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09001028 let fd = open_file(&disk.path, disk.read_only, false /*O_DIRECT*/)
Andrew Walbranbc55e302021-07-13 17:35:10 +01001029 .map_err(|e| Error::Disk(disk.path.clone(), e.into()))?;
Iliyan Malcheved149862020-04-17 23:57:47 +00001030 let arena_size = {
Daniel Verkamp46d61ba2020-02-25 10:17:50 -08001031 let metadata =
1032 std::fs::metadata(&disk.path).map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001033 let disk_len = metadata.len();
1034 // Linux requires pmem region sizes to be 2 MiB aligned. Linux will fill any partial page
1035 // at the end of an mmap'd file and won't write back beyond the actual file length, but if
1036 // we just align the size of the file to 2 MiB then access beyond the last page of the
1037 // mapped file will generate SIGBUS. So use a memory mapping arena that will provide
1038 // padding up to 2 MiB.
1039 let alignment = 2 * 1024 * 1024;
1040 let align_adjust = if disk_len % alignment != 0 {
1041 alignment - (disk_len % alignment)
1042 } else {
1043 0
1044 };
Iliyan Malcheved149862020-04-17 23:57:47 +00001045 disk_len
1046 .checked_add(align_adjust)
1047 .ok_or(Error::PmemDeviceImageTooBig)?
Jakub Starona3411ea2019-04-24 10:55:25 -07001048 };
1049
1050 let protection = {
1051 if disk.read_only {
1052 Protection::read()
1053 } else {
1054 Protection::read_write()
1055 }
1056 };
1057
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001058 let arena = {
Jakub Starona3411ea2019-04-24 10:55:25 -07001059 // Conversion from u64 to usize may fail on 32bit system.
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001060 let arena_size = usize::try_from(arena_size).map_err(|_| Error::PmemDeviceImageTooBig)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001061
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001062 let mut arena = MemoryMappingArena::new(arena_size).map_err(Error::ReservePmemMemory)?;
1063 arena
Iliyan Malcheved149862020-04-17 23:57:47 +00001064 .add_fd_offset_protection(0, arena_size, &fd, 0, protection)
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001065 .map_err(Error::ReservePmemMemory)?;
1066 arena
Jakub Starona3411ea2019-04-24 10:55:25 -07001067 };
1068
1069 let mapping_address = resources
Xiong Zhang383b3b52019-10-30 14:59:26 +08001070 .mmio_allocator(MmioType::High)
Jakub Starona3411ea2019-04-24 10:55:25 -07001071 .allocate_with_align(
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001072 arena_size,
Jakub Starona3411ea2019-04-24 10:55:25 -07001073 Alloc::PmemDevice(index),
1074 format!("pmem_disk_image_{}", index),
1075 // Linux kernel requires pmem namespaces to be 128 MiB aligned.
1076 128 * 1024 * 1024, /* 128 MiB */
1077 )
1078 .map_err(Error::AllocatePmemDeviceAddress)?;
1079
Daniel Verkampe1980a92020-02-07 11:00:55 -08001080 let slot = vm
Gurchetan Singh173fe622020-05-21 18:05:06 -07001081 .add_memory_region(
Daniel Verkampe1980a92020-02-07 11:00:55 -08001082 GuestAddress(mapping_address),
Gurchetan Singh173fe622020-05-21 18:05:06 -07001083 Box::new(arena),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001084 /* read_only = */ disk.read_only,
1085 /* log_dirty_pages = */ false,
1086 )
1087 .map_err(Error::AddPmemDeviceMemory)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001088
Daniel Verkampe1980a92020-02-07 11:00:55 -08001089 let dev = virtio::Pmem::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001090 virtio::base_features(cfg.protected_vm),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001091 fd,
1092 GuestAddress(mapping_address),
1093 slot,
1094 arena_size,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001095 Some(pmem_device_tube),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001096 )
1097 .map_err(Error::PmemDeviceNew)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001098
1099 Ok(VirtioDeviceStub {
1100 dev: Box::new(dev) as Box<dyn VirtioDevice>,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001101 jail: simple_jail(cfg, "pmem_device")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07001102 })
1103}
1104
Zide Chendfc4b882021-03-10 16:35:37 -08001105fn create_iommu_device(
1106 cfg: &Config,
Zide Chen71435c12021-03-03 15:02:02 -08001107 phys_max_addr: u64,
Zide Chendfc4b882021-03-10 16:35:37 -08001108 endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
1109) -> DeviceResult {
Zide Chen71435c12021-03-03 15:02:02 -08001110 let dev = virtio::Iommu::new(
1111 virtio::base_features(cfg.protected_vm),
1112 endpoints,
1113 phys_max_addr,
1114 )
1115 .map_err(Error::CreateVirtioIommu)?;
Zide Chendfc4b882021-03-10 16:35:37 -08001116
1117 Ok(VirtioDeviceStub {
1118 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001119 jail: simple_jail(cfg, "iommu_device")?,
Zide Chendfc4b882021-03-10 16:35:37 -08001120 })
1121}
1122
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001123fn create_console_device(cfg: &Config, param: &SerialParameters) -> DeviceResult {
Michael Hoylecd23bc22020-10-20 22:12:20 -07001124 let mut keep_rds = Vec::new();
Michael Hoyle685316f2020-09-16 15:29:20 -07001125 let evt = Event::new().map_err(Error::CreateEvent)?;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001126 let dev = param
Michael Hoylecd23bc22020-10-20 22:12:20 -07001127 .create_serial_device::<Console>(cfg.protected_vm, &evt, &mut keep_rds)
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001128 .map_err(Error::CreateConsole)?;
1129
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001130 let jail = match simple_jail(cfg, "serial")? {
Nicholas Verne71e73d82020-07-08 17:19:55 +10001131 Some(mut jail) => {
1132 // Create a tmpfs in the device's root directory so that we can bind mount the
1133 // log socket directory into it.
1134 // The size=67108864 is size=64*1024*1024 or size=64MB.
1135 jail.mount_with_data(
1136 Path::new("none"),
1137 Path::new("/"),
1138 "tmpfs",
1139 (libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_NOSUID) as usize,
1140 "size=67108864",
1141 )?;
1142 add_crosvm_user_to_jail(&mut jail, "serial")?;
1143 let res = param.add_bind_mounts(&mut jail);
1144 if res.is_err() {
1145 error!("failed to add bind mounts for console device");
1146 }
1147 Some(jail)
1148 }
1149 None => None,
1150 };
1151
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001152 Ok(VirtioDeviceStub {
1153 dev: Box::new(dev),
Nicholas Verne71e73d82020-07-08 17:19:55 +10001154 jail, // TODO(dverkamp): use a separate policy for console?
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001155 })
1156}
1157
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001158#[cfg(feature = "audio")]
1159fn create_sound_device(path: &Path, cfg: &Config) -> DeviceResult {
1160 let dev = virtio::new_sound(path, virtio::base_features(cfg.protected_vm))
1161 .map_err(Error::SoundDeviceNew)?;
1162
1163 Ok(VirtioDeviceStub {
1164 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001165 jail: simple_jail(cfg, "vios_audio_device")?,
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001166 })
1167}
1168
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001169// gpu_device_tube is not used when GPU support is disabled.
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -07001170#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001171fn create_virtio_devices(
1172 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001173 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001174 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001175 _exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001176 wayland_device_tube: Tube,
1177 gpu_device_tube: Tube,
1178 balloon_device_tube: Tube,
1179 disk_device_tubes: &mut Vec<Tube>,
1180 pmem_device_tubes: &mut Vec<Tube>,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001181 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001182 fs_device_tubes: &mut Vec<Tube>,
David Tolnay2b089fc2019-03-04 15:33:22 -08001183) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -07001184 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -08001185
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001186 for (_, param) in cfg
1187 .serial_parameters
1188 .iter()
1189 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
1190 {
1191 let dev = create_console_device(cfg, param)?;
1192 devs.push(dev);
1193 }
1194
Zach Reizner8fb52112017-12-13 16:04:39 -08001195 for disk in &cfg.disks {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001196 let disk_device_tube = disk_device_tubes.remove(0);
1197 devs.push(create_block_device(cfg, disk, disk_device_tube)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001198 }
1199
Keiichi Watanabef3a37f42021-01-21 15:41:11 +09001200 for blk in &cfg.vhost_user_blk {
1201 devs.push(create_vhost_user_block_device(cfg, blk)?);
1202 }
1203
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +09001204 for console in &cfg.vhost_user_console {
1205 devs.push(create_vhost_user_console_device(cfg, console)?);
1206 }
1207
Jakub Starona3411ea2019-04-24 10:55:25 -07001208 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001209 let pmem_device_tube = pmem_device_tubes.remove(0);
Daniel Verkampe1980a92020-02-07 11:00:55 -08001210 devs.push(create_pmem_device(
1211 cfg,
1212 vm,
1213 resources,
1214 pmem_disk,
1215 index,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001216 pmem_device_tube,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001217 )?);
Jakub Starona3411ea2019-04-24 10:55:25 -07001218 }
1219
David Tolnay2b089fc2019-03-04 15:33:22 -08001220 devs.push(create_rng_device(cfg)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001221
David Tolnayde6b29a2018-12-20 11:49:46 -08001222 #[cfg(feature = "tpm")]
1223 {
David Tolnay43f8e212019-02-13 17:28:16 -08001224 if cfg.software_tpm {
David Tolnay2b089fc2019-03-04 15:33:22 -08001225 devs.push(create_tpm_device(cfg)?);
David Tolnay43f8e212019-02-13 17:28:16 -08001226 }
David Tolnayde6b29a2018-12-20 11:49:46 -08001227 }
1228
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001229 for (idx, single_touch_spec) in cfg.virtio_single_touch.iter().enumerate() {
1230 devs.push(create_single_touch_device(
1231 cfg,
1232 single_touch_spec,
1233 idx as u32,
1234 )?);
Jorge E. Moreira99d3f082019-03-07 10:59:54 -08001235 }
1236
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001237 for (idx, multi_touch_spec) in cfg.virtio_multi_touch.iter().enumerate() {
1238 devs.push(create_multi_touch_device(
1239 cfg,
1240 multi_touch_spec,
1241 idx as u32,
1242 )?);
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001243 }
1244
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001245 for (idx, trackpad_spec) in cfg.virtio_trackpad.iter().enumerate() {
1246 devs.push(create_trackpad_device(cfg, trackpad_spec, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001247 }
1248
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001249 for (idx, mouse_socket) in cfg.virtio_mice.iter().enumerate() {
1250 devs.push(create_mouse_device(cfg, mouse_socket, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001251 }
1252
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001253 for (idx, keyboard_socket) in cfg.virtio_keyboard.iter().enumerate() {
1254 devs.push(create_keyboard_device(cfg, keyboard_socket, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001255 }
1256
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001257 for (idx, switches_socket) in cfg.virtio_switches.iter().enumerate() {
1258 devs.push(create_switches_device(cfg, switches_socket, idx as u32)?);
Daniel Norman5e23df72021-03-11 10:11:02 -08001259 }
1260
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001261 for dev_path in &cfg.virtio_input_evdevs {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001262 devs.push(create_vinput_device(cfg, dev_path)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001263 }
1264
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001265 devs.push(create_balloon_device(cfg, balloon_device_tube)?);
Dylan Reid295ccac2017-11-06 14:06:24 -08001266
Zach Reizner39aa26b2017-12-12 18:03:23 -08001267 // We checked above that if the IP is defined, then the netmask is, too.
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001268 for tap_fd in &cfg.tap_fd {
David Tolnay2b089fc2019-03-04 15:33:22 -08001269 devs.push(create_tap_net_device(cfg, *tap_fd)?);
Jorge E. Moreirab7952802019-02-12 16:43:05 -08001270 }
1271
David Tolnay2b089fc2019-03-04 15:33:22 -08001272 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
1273 (cfg.host_ip, cfg.netmask, cfg.mac_address)
1274 {
Keiichi Watanabe60686582021-03-12 04:53:51 +09001275 if !cfg.vhost_user_net.is_empty() {
1276 return Err(Error::VhostUserNetWithNetArgs);
1277 }
Zach Reiznerdc748482021-04-14 13:59:30 -07001278 devs.push(create_net_device(
1279 cfg,
1280 host_ip,
1281 netmask,
1282 mac_address,
1283 vm.get_memory(),
1284 )?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001285 }
1286
Keiichi Watanabe60686582021-03-12 04:53:51 +09001287 for net in &cfg.vhost_user_net {
1288 devs.push(create_vhost_user_net_device(cfg, net)?);
1289 }
1290
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09001291 for opt in &cfg.vhost_user_wl {
1292 devs.push(create_vhost_user_wl_device(cfg, opt)?);
1293 }
1294
David Tolnayfa701712019-02-13 16:42:54 -08001295 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001296 let mut resource_bridges = Vec::<Tube>::new();
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001297
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001298 if !cfg.wayland_socket_paths.is_empty() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001299 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001300 let mut wl_resource_bridge = None::<Tube>;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001301
1302 #[cfg(feature = "gpu")]
1303 {
Jason Macnakcc7070b2019-11-06 14:48:12 -08001304 if cfg.gpu_parameters.is_some() {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001305 let (wl_socket, gpu_socket) = Tube::pair().map_err(Error::CreateTube)?;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001306 resource_bridges.push(gpu_socket);
1307 wl_resource_bridge = Some(wl_socket);
1308 }
1309 }
1310
1311 devs.push(create_wayland_device(
1312 cfg,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001313 wayland_device_tube,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001314 wl_resource_bridge,
1315 )?);
1316 }
David Tolnayfa701712019-02-13 16:42:54 -08001317
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001318 #[cfg(feature = "video-decoder")]
Daniel Verkampffb59122021-03-18 14:06:15 -07001319 let video_dec_tube = if cfg.video_dec {
1320 let (video_tube, gpu_tube) = Tube::pair().map_err(Error::CreateTube)?;
1321 resource_bridges.push(gpu_tube);
1322 Some(video_tube)
1323 } else {
1324 None
1325 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001326
1327 #[cfg(feature = "video-encoder")]
Daniel Verkampffb59122021-03-18 14:06:15 -07001328 let video_enc_tube = if cfg.video_enc {
1329 let (video_tube, gpu_tube) = Tube::pair().map_err(Error::CreateTube)?;
1330 resource_bridges.push(gpu_tube);
1331 Some(video_tube)
1332 } else {
1333 None
1334 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001335
Zach Reizner3a8100a2017-09-13 19:15:43 -07001336 #[cfg(feature = "gpu")]
1337 {
Noah Golddc7f52b2020-02-01 13:01:58 -08001338 if let Some(gpu_parameters) = &cfg.gpu_parameters {
Jason Macnakd659a0d2021-03-15 15:33:01 -07001339 let mut gpu_display_w = DEFAULT_DISPLAY_WIDTH;
1340 let mut gpu_display_h = DEFAULT_DISPLAY_HEIGHT;
1341 if !gpu_parameters.displays.is_empty() {
1342 gpu_display_w = gpu_parameters.displays[0].width;
1343 gpu_display_h = gpu_parameters.displays[0].height;
1344 }
1345
Zach Reizner65b98f12019-11-22 17:34:58 -08001346 let mut event_devices = Vec::new();
1347 if cfg.display_window_mouse {
1348 let (event_device_socket, virtio_dev_socket) =
1349 UnixStream::pair().map_err(Error::CreateSocket)?;
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001350 let (multi_touch_width, multi_touch_height) = cfg
1351 .virtio_multi_touch
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001352 .first()
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001353 .as_ref()
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001354 .map(|multi_touch_spec| multi_touch_spec.get_size())
Jason Macnakd659a0d2021-03-15 15:33:01 -07001355 .unwrap_or((gpu_display_w, gpu_display_h));
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001356 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001357 // u32::MAX is the least likely to collide with the indices generated above for
1358 // the multi_touch options, which begin at 0.
1359 u32::MAX,
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001360 virtio_dev_socket,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001361 multi_touch_width,
1362 multi_touch_height,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001363 virtio::base_features(cfg.protected_vm),
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001364 )
1365 .map_err(Error::InputDeviceNew)?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001366 devs.push(VirtioDeviceStub {
1367 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001368 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001369 });
1370 event_devices.push(EventDevice::touchscreen(event_device_socket));
1371 }
1372 if cfg.display_window_keyboard {
1373 let (event_device_socket, virtio_dev_socket) =
1374 UnixStream::pair().map_err(Error::CreateSocket)?;
Noah Goldd4ca29b2020-10-27 12:21:52 -07001375 let dev = virtio::new_keyboard(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001376 // u32::MAX is the least likely to collide with the indices generated above for
1377 // the multi_touch options, which begin at 0.
1378 u32::MAX,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001379 virtio_dev_socket,
1380 virtio::base_features(cfg.protected_vm),
1381 )
1382 .map_err(Error::InputDeviceNew)?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001383 devs.push(VirtioDeviceStub {
1384 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001385 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001386 });
1387 event_devices.push(EventDevice::keyboard(event_device_socket));
1388 }
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001389 devs.push(create_gpu_device(
1390 cfg,
1391 _exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001392 gpu_device_tube,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001393 resource_bridges,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001394 // Use the unnamed socket for GPU display screens.
1395 cfg.wayland_socket_paths.get(""),
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001396 cfg.x_display.clone(),
Zach Reizner65b98f12019-11-22 17:34:58 -08001397 event_devices,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001398 map_request,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001399 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -07001400 }
1401 }
1402
Daniel Verkampffb59122021-03-18 14:06:15 -07001403 #[cfg(feature = "video-decoder")]
1404 {
1405 if let Some(video_dec_tube) = video_dec_tube {
1406 register_video_device(
1407 &mut devs,
1408 video_dec_tube,
1409 cfg,
1410 devices::virtio::VideoDeviceType::Decoder,
1411 )?;
1412 }
1413 }
1414
1415 #[cfg(feature = "video-encoder")]
1416 {
1417 if let Some(video_enc_tube) = video_enc_tube {
1418 register_video_device(
1419 &mut devs,
1420 video_enc_tube,
1421 cfg,
1422 devices::virtio::VideoDeviceType::Encoder,
1423 )?;
1424 }
1425 }
1426
Zach Reizneraa575662018-08-15 10:46:32 -07001427 if let Some(cid) = cfg.cid {
Zach Reiznerdc748482021-04-14 13:59:30 -07001428 devs.push(create_vhost_vsock_device(cfg, cid, vm.get_memory())?);
Zach Reizneraa575662018-08-15 10:46:32 -07001429 }
1430
Woody Chow5890b702021-02-12 14:57:02 +09001431 for vhost_user_fs in &cfg.vhost_user_fs {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001432 devs.push(create_vhost_user_fs_device(cfg, vhost_user_fs)?);
Woody Chow5890b702021-02-12 14:57:02 +09001433 }
1434
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001435 for shared_dir in &cfg.shared_dirs {
1436 let SharedDir {
1437 src,
1438 tag,
1439 kind,
1440 uid_map,
1441 gid_map,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001442 fs_cfg,
1443 p9_cfg,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001444 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -08001445
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001446 let dev = match kind {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001447 SharedDirKind::FS => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001448 let device_tube = fs_device_tubes.remove(0);
1449 create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone(), device_tube)?
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001450 }
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001451 SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag, p9_cfg.clone())?,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001452 };
1453 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -08001454 }
1455
JaeMan Parkeb9cc532021-07-02 15:02:59 +09001456 if let Some(vhost_user_mac80211_hwsim) = &cfg.vhost_user_mac80211_hwsim {
1457 devs.push(create_vhost_user_mac80211_hwsim_device(
1458 cfg,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001459 vhost_user_mac80211_hwsim,
JaeMan Parkeb9cc532021-07-02 15:02:59 +09001460 )?);
1461 }
1462
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001463 #[cfg(feature = "audio")]
1464 if let Some(path) = &cfg.sound {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001465 devs.push(create_sound_device(path, cfg)?);
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001466 }
1467
David Tolnay2b089fc2019-03-04 15:33:22 -08001468 Ok(devs)
1469}
1470
Xiong Zhang10f15052021-04-08 17:23:33 +08001471fn create_vfio_device(
1472 cfg: &Config,
1473 vm: &impl Vm,
1474 resources: &mut SystemAllocator,
1475 control_tubes: &mut Vec<TaggedControlTube>,
1476 vfio_path: &Path,
Xiong Zhange19ab752021-05-20 18:18:46 +08001477 hotplug: bool,
Zide Chendfc4b882021-03-10 16:35:37 -08001478 endpoints: &mut BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
1479 iommu_enabled: bool,
Xiong Zhang10f15052021-04-08 17:23:33 +08001480) -> DeviceResult<(Box<VfioPciDevice>, Option<Minijail>)> {
Zide Chendfc4b882021-03-10 16:35:37 -08001481 let vfio_container = VfioCommonSetup::vfio_get_container(vfio_path, iommu_enabled)
1482 .map_err(Error::CreateVfioDevice)?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001483
1484 // create MSI, MSI-X, and Mem request sockets for each vfio device
1485 let (vfio_host_tube_msi, vfio_device_tube_msi) = Tube::pair().map_err(Error::CreateTube)?;
1486 control_tubes.push(TaggedControlTube::VmIrq(vfio_host_tube_msi));
1487
1488 let (vfio_host_tube_msix, vfio_device_tube_msix) = Tube::pair().map_err(Error::CreateTube)?;
1489 control_tubes.push(TaggedControlTube::VmIrq(vfio_host_tube_msix));
1490
1491 let (vfio_host_tube_mem, vfio_device_tube_mem) = Tube::pair().map_err(Error::CreateTube)?;
1492 control_tubes.push(TaggedControlTube::VmMemory(vfio_host_tube_mem));
1493
Xiong Zhange19ab752021-05-20 18:18:46 +08001494 // put hotplug vfio device on Bus#1 temporary
1495 let bus_num = if hotplug { Some(1) } else { None };
Xiong Zhangdea7dbb2021-07-26 14:49:03 +08001496 let vfio_device = VfioDevice::new(vfio_path, vm, vfio_container.clone(), iommu_enabled)
1497 .map_err(Error::CreateVfioDevice)?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001498 let mut vfio_pci_device = Box::new(VfioPciDevice::new(
1499 vfio_device,
Xiong Zhange19ab752021-05-20 18:18:46 +08001500 bus_num,
Xiong Zhang10f15052021-04-08 17:23:33 +08001501 vfio_device_tube_msi,
1502 vfio_device_tube_msix,
1503 vfio_device_tube_mem,
1504 ));
1505 // early reservation for pass-through PCI devices.
Zide Chendfc4b882021-03-10 16:35:37 -08001506 let endpoint_addr = vfio_pci_device.allocate_address(resources);
1507 if endpoint_addr.is_err() {
Xiong Zhang10f15052021-04-08 17:23:33 +08001508 warn!(
1509 "address reservation failed for vfio {}",
1510 vfio_pci_device.debug_label()
1511 );
1512 }
1513
Zide Chendfc4b882021-03-10 16:35:37 -08001514 if iommu_enabled {
1515 endpoints.insert(endpoint_addr.unwrap().to_u32(), vfio_container);
1516 }
1517
Xiong Zhang10f15052021-04-08 17:23:33 +08001518 Ok((vfio_pci_device, simple_jail(cfg, "vfio_device")?))
1519}
1520
David Tolnay2b089fc2019-03-04 15:33:22 -08001521fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -06001522 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001523 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001524 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001525 exit_evt: &Event,
Zide Chen71435c12021-03-03 15:02:02 -08001526 phys_max_addr: u64,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001527 control_tubes: &mut Vec<TaggedControlTube>,
1528 wayland_device_tube: Tube,
1529 gpu_device_tube: Tube,
1530 balloon_device_tube: Tube,
1531 disk_device_tubes: &mut Vec<Tube>,
1532 pmem_device_tubes: &mut Vec<Tube>,
1533 fs_device_tubes: &mut Vec<Tube>,
Daniel Verkampf1439d42021-05-21 13:55:10 -07001534 #[cfg(feature = "usb")] usb_provider: HostBackendDeviceProvider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001535 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnayfdac5ed2019-03-08 16:56:14 -08001536) -> DeviceResult<Vec<(Box<dyn PciDevice>, Option<Minijail>)>> {
David Tolnay2b089fc2019-03-04 15:33:22 -08001537 let stubs = create_virtio_devices(
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001538 cfg,
Jakub Starona3411ea2019-04-24 10:55:25 -07001539 vm,
1540 resources,
David Tolnay2b089fc2019-03-04 15:33:22 -08001541 exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001542 wayland_device_tube,
1543 gpu_device_tube,
1544 balloon_device_tube,
1545 disk_device_tubes,
1546 pmem_device_tubes,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001547 map_request,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001548 fs_device_tubes,
David Tolnay2b089fc2019-03-04 15:33:22 -08001549 )?;
1550
1551 let mut pci_devices = Vec::new();
1552
1553 for stub in stubs {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001554 let (msi_host_tube, msi_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
1555 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
Zach Reiznerdc748482021-04-14 13:59:30 -07001556 let dev = VirtioPciDevice::new(vm.get_memory().clone(), stub.dev, msi_device_tube)
Daniel Verkampbb712d62019-11-19 09:47:33 -08001557 .map_err(Error::VirtioPciDev)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -08001558 let dev = Box::new(dev) as Box<dyn PciDevice>;
David Tolnay2b089fc2019-03-04 15:33:22 -08001559 pci_devices.push((dev, stub.jail));
1560 }
1561
Andrew Scull1590e6f2020-03-18 18:00:47 +00001562 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +08001563 for ac97_param in &cfg.ac97_parameters {
Zach Reiznerdc748482021-04-14 13:59:30 -07001564 let dev = Ac97Dev::try_new(vm.get_memory().clone(), ac97_param.clone())
1565 .map_err(Error::CreateAc97)?;
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001566 let jail = simple_jail(cfg, dev.minijail_policy())?;
paulhsiace17e6e2020-08-28 18:37:45 +08001567 pci_devices.push((Box::new(dev), jail));
David Tolnay2b089fc2019-03-04 15:33:22 -08001568 }
Andrew Scull1590e6f2020-03-18 18:00:47 +00001569
Daniel Verkampf1439d42021-05-21 13:55:10 -07001570 #[cfg(feature = "usb")]
1571 {
1572 // Create xhci controller.
1573 let usb_controller = Box::new(XhciController::new(vm.get_memory().clone(), usb_provider));
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001574 pci_devices.push((usb_controller, simple_jail(cfg, "xhci")?));
Daniel Verkampf1439d42021-05-21 13:55:10 -07001575 }
David Tolnay2b089fc2019-03-04 15:33:22 -08001576
Xiong Zhang262e6182021-05-18 14:58:07 +08001577 // Create Pcie Root Port
1578 let pcie_root_port = Box::new(PcieRootPort::new());
1579 let (msi_host_tube, msi_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
1580 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
1581 let pci_bridge = Box::new(PciBridge::new(pcie_root_port, msi_device_tube));
1582 // pcie root port is used in hotplug process only, so disable sandbox for it
1583 pci_devices.push((pci_bridge, None));
1584
Zide Chen5deee482021-04-19 11:06:01 -07001585 if !cfg.vfio.is_empty() {
Zide Chendfc4b882021-03-10 16:35:37 -08001586 let mut iommu_attached_endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>> =
1587 BTreeMap::new();
1588
1589 for (vfio_path, enable_iommu) in cfg.vfio.iter() {
Zide Chen5deee482021-04-19 11:06:01 -07001590 let (vfio_pci_device, jail) = create_vfio_device(
1591 cfg,
1592 vm,
1593 resources,
1594 control_tubes,
1595 vfio_path.as_path(),
Xiong Zhange19ab752021-05-20 18:18:46 +08001596 false,
Zide Chendfc4b882021-03-10 16:35:37 -08001597 &mut iommu_attached_endpoints,
1598 *enable_iommu,
Zide Chen5deee482021-04-19 11:06:01 -07001599 )?;
Zide Chendfc4b882021-03-10 16:35:37 -08001600
Zide Chen5deee482021-04-19 11:06:01 -07001601 pci_devices.push((vfio_pci_device, jail));
1602 }
Zide Chendfc4b882021-03-10 16:35:37 -08001603
1604 if !iommu_attached_endpoints.is_empty() {
Zide Chen71435c12021-03-03 15:02:02 -08001605 let iommu_dev = create_iommu_device(cfg, phys_max_addr, iommu_attached_endpoints)?;
Zide Chendfc4b882021-03-10 16:35:37 -08001606
1607 let (msi_host_tube, msi_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
1608 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
Peter Fangad3b24e2021-06-21 00:43:29 -07001609 let mut dev =
1610 VirtioPciDevice::new(vm.get_memory().clone(), iommu_dev.dev, msi_device_tube)
1611 .map_err(Error::VirtioPciDev)?;
1612 // early reservation for viommu.
1613 dev.allocate_address(resources)
1614 .map_err(|_| Error::VirtioPciDev(base::Error::new(EINVAL)))?;
1615 let dev = Box::new(dev);
Zide Chendfc4b882021-03-10 16:35:37 -08001616 pci_devices.push((dev, iommu_dev.jail));
1617 }
Xiong Zhang17b0daf2019-04-23 17:14:50 +08001618 }
1619
David Tolnay2b089fc2019-03-04 15:33:22 -08001620 Ok(pci_devices)
1621}
1622
1623#[derive(Copy, Clone)]
Chirantan Ekbote1a2683b2019-11-26 16:28:23 +09001624#[cfg_attr(not(feature = "tpm"), allow(dead_code))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001625struct Ids {
1626 uid: uid_t,
1627 gid: gid_t,
1628}
1629
David Tolnay48c48292019-03-01 16:54:25 -08001630// Set the uid/gid for the jailed process and give a basic id map. This is
1631// required for bind mounts to work.
David Tolnayfd0971d2019-03-04 17:15:57 -08001632fn add_crosvm_user_to_jail(jail: &mut Minijail, feature: &str) -> Result<Ids> {
David Tolnay48c48292019-03-01 16:54:25 -08001633 let crosvm_user_group = CStr::from_bytes_with_nul(b"crosvm\0").unwrap();
1634
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001635 let crosvm_uid = match get_user_id(crosvm_user_group) {
David Tolnay48c48292019-03-01 16:54:25 -08001636 Ok(u) => u,
1637 Err(e) => {
1638 warn!("falling back to current user id for {}: {}", feature, e);
1639 geteuid()
1640 }
1641 };
1642
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001643 let crosvm_gid = match get_group_id(crosvm_user_group) {
David Tolnay48c48292019-03-01 16:54:25 -08001644 Ok(u) => u,
1645 Err(e) => {
1646 warn!("falling back to current group id for {}: {}", feature, e);
1647 getegid()
1648 }
1649 };
1650
1651 jail.change_uid(crosvm_uid);
1652 jail.change_gid(crosvm_gid);
1653 jail.uidmap(&format!("{0} {0} 1", crosvm_uid))
1654 .map_err(Error::SettingUidMap)?;
1655 jail.gidmap(&format!("{0} {0} 1", crosvm_gid))
1656 .map_err(Error::SettingGidMap)?;
1657
David Tolnay41a6f842019-03-01 16:18:44 -08001658 Ok(Ids {
1659 uid: crosvm_uid,
1660 gid: crosvm_gid,
1661 })
David Tolnay48c48292019-03-01 16:54:25 -08001662}
1663
Zach Reizner65b98f12019-11-22 17:34:58 -08001664trait IntoUnixStream {
1665 fn into_unix_stream(self) -> Result<UnixStream>;
1666}
1667
1668impl<'a> IntoUnixStream for &'a Path {
1669 fn into_unix_stream(self) -> Result<UnixStream> {
Andrew Walbranbc55e302021-07-13 17:35:10 +01001670 if let Some(fd) =
1671 safe_descriptor_from_path(self).map_err(|e| Error::InputEventsOpen(e.into()))?
1672 {
1673 Ok(fd.into())
Zach Reizner65b98f12019-11-22 17:34:58 -08001674 } else {
1675 UnixStream::connect(self).map_err(Error::InputEventsOpen)
1676 }
1677 }
1678}
1679impl<'a> IntoUnixStream for &'a PathBuf {
1680 fn into_unix_stream(self) -> Result<UnixStream> {
1681 self.as_path().into_unix_stream()
1682 }
1683}
1684
1685impl IntoUnixStream for UnixStream {
1686 fn into_unix_stream(self) -> Result<UnixStream> {
1687 Ok(self)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001688 }
1689}
1690
Steven Richmanf32d0b42020-06-20 21:45:32 -07001691fn setup_vcpu_signal_handler<T: Vcpu>(use_hypervisor_signals: bool) -> Result<()> {
1692 if use_hypervisor_signals {
Matt Delco84cf9c02019-10-07 22:38:13 -07001693 unsafe {
Allen Webb44c728c2021-03-23 15:22:41 -05001694 extern "C" fn handle_signal(_: c_int) {}
Matt Delco84cf9c02019-10-07 22:38:13 -07001695 // Our signal handler does nothing and is trivially async signal safe.
1696 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
1697 .map_err(Error::RegisterSignalHandler)?;
1698 }
1699 block_signal(SIGRTMIN() + 0).map_err(Error::BlockSignal)?;
1700 } else {
1701 unsafe {
Allen Webb44c728c2021-03-23 15:22:41 -05001702 extern "C" fn handle_signal<T: Vcpu>(_: c_int) {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001703 T::set_local_immediate_exit(true);
Matt Delco84cf9c02019-10-07 22:38:13 -07001704 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001705 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal::<T>)
Matt Delco84cf9c02019-10-07 22:38:13 -07001706 .map_err(Error::RegisterSignalHandler)?;
1707 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001708 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001709 Ok(())
1710}
1711
Steven Richmanf32d0b42020-06-20 21:45:32 -07001712// Sets up a vcpu and converts it into a runnable vcpu.
Zach Reizner2c770e62020-09-30 16:49:59 -07001713fn runnable_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001714 cpu_id: usize,
1715 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07001716 vm: impl VmArch,
Zach Reiznerdc748482021-04-14 13:59:30 -07001717 irq_chip: &mut dyn IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001718 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001719 run_rt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001720 vcpu_affinity: Vec<usize>,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001721 no_smt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001722 has_bios: bool,
1723 use_hypervisor_signals: bool,
Zach Reizner2c770e62020-09-30 16:49:59 -07001724) -> Result<(V, VcpuRunHandle)>
Steven Richmanf32d0b42020-06-20 21:45:32 -07001725where
Zach Reizner2c770e62020-09-30 16:49:59 -07001726 V: VcpuArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001727{
Zach Reizner304e7312020-09-29 16:00:24 -07001728 let mut vcpu = match vcpu {
1729 Some(v) => v,
1730 None => {
1731 // If vcpu is None, it means this arch/hypervisor requires create_vcpu to be called from
1732 // the vcpu thread.
1733 match vm
1734 .create_vcpu(cpu_id)
1735 .map_err(Error::CreateVcpu)?
1736 .downcast::<V>()
1737 {
1738 Ok(v) => *v,
1739 Err(_) => panic!("VM created wrong type of VCPU"),
1740 }
1741 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001742 };
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001743
Steven Richmanf32d0b42020-06-20 21:45:32 -07001744 irq_chip
Zach Reizner304e7312020-09-29 16:00:24 -07001745 .add_vcpu(cpu_id, &vcpu)
Steven Richmanf32d0b42020-06-20 21:45:32 -07001746 .map_err(Error::AddIrqChipVcpu)?;
1747
Daniel Verkampcaf9ced2020-09-29 15:35:02 -07001748 if !vcpu_affinity.is_empty() {
1749 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
1750 error!("Failed to set CPU affinity: {}", e);
1751 }
1752 }
1753
Steven Richmanf32d0b42020-06-20 21:45:32 -07001754 Arch::configure_vcpu(
1755 vm.get_memory(),
1756 vm.get_hypervisor(),
1757 irq_chip,
1758 &mut vcpu,
1759 cpu_id,
1760 vcpu_count,
1761 has_bios,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001762 no_smt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001763 )
1764 .map_err(Error::ConfigureVcpu)?;
1765
Zach Reizner026f72f2021-06-01 14:35:29 -07001766 if let Err(e) = enable_core_scheduling() {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001767 error!("Failed to enable core scheduling: {}", e);
1768 }
1769
Kansho Nishidaab205af2020-08-13 18:17:50 +09001770 if run_rt {
1771 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
1772 if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
1773 .and_then(|_| set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL)))
1774 {
1775 warn!("Failed to set vcpu to real time: {}", e);
1776 }
1777 }
1778
Steven Richmanf32d0b42020-06-20 21:45:32 -07001779 if use_hypervisor_signals {
1780 let mut v = get_blocked_signals().map_err(Error::GetSignalMask)?;
1781 v.retain(|&x| x != SIGRTMIN() + 0);
1782 vcpu.set_signal_mask(&v).map_err(Error::SettingSignalMask)?;
1783 }
1784
Zach Reizner2c770e62020-09-30 16:49:59 -07001785 let vcpu_run_handle = vcpu
1786 .take_run_handle(Some(SIGRTMIN() + 0))
1787 .map_err(Error::RunnableVcpu)?;
1788
1789 Ok((vcpu, vcpu_run_handle))
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001790}
1791
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001792#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1793fn handle_debug_msg<V>(
1794 cpu_id: usize,
1795 vcpu: &V,
1796 guest_mem: &GuestMemory,
1797 d: VcpuDebug,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001798 reply_tube: &mpsc::Sender<VcpuDebugStatusMessage>,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001799) -> Result<()>
1800where
1801 V: VcpuArch + 'static,
1802{
1803 match d {
1804 VcpuDebug::ReadRegs => {
1805 let msg = VcpuDebugStatusMessage {
1806 cpu: cpu_id as usize,
1807 msg: VcpuDebugStatus::RegValues(
1808 Arch::debug_read_registers(vcpu as &V).map_err(Error::HandleDebugCommand)?,
1809 ),
1810 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001811 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001812 .send(msg)
1813 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1814 }
1815 VcpuDebug::WriteRegs(regs) => {
1816 Arch::debug_write_registers(vcpu as &V, &regs).map_err(Error::HandleDebugCommand)?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001817 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001818 .send(VcpuDebugStatusMessage {
1819 cpu: cpu_id as usize,
1820 msg: VcpuDebugStatus::CommandComplete,
1821 })
1822 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1823 }
1824 VcpuDebug::ReadMem(vaddr, len) => {
1825 let msg = VcpuDebugStatusMessage {
1826 cpu: cpu_id as usize,
1827 msg: VcpuDebugStatus::MemoryRegion(
1828 Arch::debug_read_memory(vcpu as &V, guest_mem, vaddr, len)
1829 .unwrap_or(Vec::new()),
1830 ),
1831 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001832 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001833 .send(msg)
1834 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1835 }
1836 VcpuDebug::WriteMem(vaddr, buf) => {
1837 Arch::debug_write_memory(vcpu as &V, guest_mem, vaddr, &buf)
1838 .map_err(Error::HandleDebugCommand)?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001839 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001840 .send(VcpuDebugStatusMessage {
1841 cpu: cpu_id as usize,
1842 msg: VcpuDebugStatus::CommandComplete,
1843 })
1844 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1845 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09001846 VcpuDebug::EnableSinglestep => {
1847 Arch::debug_enable_singlestep(vcpu as &V).map_err(Error::HandleDebugCommand)?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001848 reply_tube
Keiichi Watanabe23f94712020-10-22 17:43:06 +09001849 .send(VcpuDebugStatusMessage {
1850 cpu: cpu_id as usize,
1851 msg: VcpuDebugStatus::CommandComplete,
1852 })
1853 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1854 }
1855 VcpuDebug::SetHwBreakPoint(addrs) => {
1856 Arch::debug_set_hw_breakpoints(vcpu as &V, &addrs)
1857 .map_err(Error::HandleDebugCommand)?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001858 reply_tube
Keiichi Watanabe23f94712020-10-22 17:43:06 +09001859 .send(VcpuDebugStatusMessage {
1860 cpu: cpu_id as usize,
1861 msg: VcpuDebugStatus::CommandComplete,
1862 })
1863 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1864 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001865 }
1866}
1867
Zach Reizner2c770e62020-09-30 16:49:59 -07001868fn run_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001869 cpu_id: usize,
1870 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07001871 vm: impl VmArch + 'static,
Zach Reiznerdc748482021-04-14 13:59:30 -07001872 mut irq_chip: Box<dyn IrqChipArch + 'static>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001873 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001874 run_rt: bool,
Daniel Verkamp107edb32019-04-05 09:58:48 -07001875 vcpu_affinity: Vec<usize>,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09001876 delay_rt: bool,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001877 no_smt: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001878 start_barrier: Arc<Barrier>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001879 has_bios: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001880 io_bus: devices::Bus,
1881 mmio_bus: devices::Bus,
Michael Hoyle685316f2020-09-16 15:29:20 -07001882 exit_evt: Event,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001883 requires_pvclock_ctrl: bool,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001884 from_main_tube: mpsc::Receiver<VcpuControl>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001885 use_hypervisor_signals: bool,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001886 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_tube: Option<
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001887 mpsc::Sender<VcpuDebugStatusMessage>,
1888 >,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001889) -> Result<JoinHandle<()>>
1890where
Zach Reizner2c770e62020-09-30 16:49:59 -07001891 V: VcpuArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001892{
Zach Reizner8fb52112017-12-13 16:04:39 -08001893 thread::Builder::new()
1894 .name(format!("crosvm_vcpu{}", cpu_id))
1895 .spawn(move || {
Zach Reizner95885312020-01-29 18:06:01 -08001896 // The VCPU thread must trigger the `exit_evt` in all paths, and a `ScopedEvent`'s Drop
1897 // implementation accomplishes that.
1898 let _scoped_exit_evt = ScopedEvent::from(exit_evt);
1899
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001900 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1901 let guest_mem = vm.get_memory().clone();
Zach Reizner2c770e62020-09-30 16:49:59 -07001902 let runnable_vcpu = runnable_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001903 cpu_id,
1904 vcpu,
1905 vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07001906 irq_chip.as_mut(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07001907 vcpu_count,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09001908 run_rt && !delay_rt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001909 vcpu_affinity,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001910 no_smt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001911 has_bios,
1912 use_hypervisor_signals,
1913 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08001914
Zach Reizner8fb52112017-12-13 16:04:39 -08001915 start_barrier.wait();
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001916
Zach Reizner2c770e62020-09-30 16:49:59 -07001917 let (vcpu, vcpu_run_handle) = match runnable_vcpu {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001918 Ok(v) => v,
1919 Err(e) => {
1920 error!("failed to start vcpu {}: {}", cpu_id, e);
1921 return;
1922 }
1923 };
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001924
Dylan Reidb0492662019-05-17 14:50:13 -07001925 let mut run_mode = VmRunMode::Running;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001926 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001927 if to_gdb_tube.is_some() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001928 // Wait until a GDB client attaches
1929 run_mode = VmRunMode::Breakpoint;
1930 }
1931
Dylan Reidb0492662019-05-17 14:50:13 -07001932 let mut interrupted_by_signal = false;
1933
1934 'vcpu_loop: loop {
1935 // Start by checking for messages to process and the run state of the CPU.
1936 // An extra check here for Running so there isn't a need to call recv unless a
1937 // message is likely to be ready because a signal was sent.
1938 if interrupted_by_signal || run_mode != VmRunMode::Running {
1939 'state_loop: loop {
1940 // Tries to get a pending message without blocking first.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001941 let msg = match from_main_tube.try_recv() {
Dylan Reidb0492662019-05-17 14:50:13 -07001942 Ok(m) => m,
1943 Err(mpsc::TryRecvError::Empty) if run_mode == VmRunMode::Running => {
1944 // If the VM is running and no message is pending, the state won't
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001945 // change.
Dylan Reidb0492662019-05-17 14:50:13 -07001946 break 'state_loop;
1947 }
1948 Err(mpsc::TryRecvError::Empty) => {
1949 // If the VM is not running, wait until a message is ready.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001950 match from_main_tube.recv() {
Dylan Reidb0492662019-05-17 14:50:13 -07001951 Ok(m) => m,
1952 Err(mpsc::RecvError) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001953 error!("Failed to read from main tube in vcpu");
Dylan Reidb0492662019-05-17 14:50:13 -07001954 break 'vcpu_loop;
1955 }
1956 }
1957 }
1958 Err(mpsc::TryRecvError::Disconnected) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001959 error!("Failed to read from main tube in vcpu");
Dylan Reidb0492662019-05-17 14:50:13 -07001960 break 'vcpu_loop;
1961 }
1962 };
1963
1964 // Collect all pending messages.
1965 let mut messages = vec![msg];
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001966 messages.append(&mut from_main_tube.try_iter().collect());
Dylan Reidb0492662019-05-17 14:50:13 -07001967
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001968 for msg in messages {
1969 match msg {
1970 VcpuControl::RunState(new_mode) => {
1971 run_mode = new_mode;
1972 match run_mode {
1973 VmRunMode::Running => break 'state_loop,
1974 VmRunMode::Suspending => {
1975 // On KVM implementations that use a paravirtualized
1976 // clock (e.g. x86), a flag must be set to indicate to
1977 // the guest kernel that a vCPU was suspended. The guest
1978 // kernel will use this flag to prevent the soft lockup
1979 // detection from triggering when this vCPU resumes,
1980 // which could happen days later in realtime.
1981 if requires_pvclock_ctrl {
1982 if let Err(e) = vcpu.pvclock_ctrl() {
1983 error!(
1984 "failed to tell hypervisor vcpu {} is suspending: {}",
1985 cpu_id, e
1986 );
1987 }
1988 }
1989 }
1990 VmRunMode::Breakpoint => {}
1991 VmRunMode::Exiting => break 'vcpu_loop,
1992 }
1993 }
1994 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1995 VcpuControl::Debug(d) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001996 match &to_gdb_tube {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001997 Some(ref ch) => {
1998 if let Err(e) = handle_debug_msg(
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001999 cpu_id, &vcpu, &guest_mem, d, ch,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002000 ) {
2001 error!("Failed to handle gdb message: {}", e);
2002 }
2003 },
2004 None => {
2005 error!("VcpuControl::Debug received while GDB feature is disabled: {:?}", d);
Dylan Reidb0492662019-05-17 14:50:13 -07002006 }
2007 }
2008 }
Suleiman Souhlal2ac78b92021-02-01 12:33:26 +09002009 VcpuControl::MakeRT => {
2010 if run_rt && delay_rt {
2011 info!("Making vcpu {} RT\n", cpu_id);
2012 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
2013 if let Err(e) = set_rt_prio_limit(
2014 u64::from(DEFAULT_VCPU_RT_LEVEL))
2015 .and_then(|_|
2016 set_rt_round_robin(
2017 i32::from(DEFAULT_VCPU_RT_LEVEL)
2018 ))
2019 {
2020 warn!("Failed to set vcpu to real time: {}", e);
2021 }
2022 }
2023 }
Dylan Reidb0492662019-05-17 14:50:13 -07002024 }
2025 }
2026 }
2027 }
2028
2029 interrupted_by_signal = false;
2030
Steven Richman11dc6712020-09-02 15:39:14 -07002031 // Vcpus may have run a HLT instruction, which puts them into a state other than
2032 // VcpuRunState::Runnable. In that case, this call to wait_until_runnable blocks
2033 // until either the irqchip receives an interrupt for this vcpu, or until the main
2034 // thread kicks this vcpu as a result of some VmControl operation. In most IrqChip
2035 // implementations HLT instructions do not make it to crosvm, and thus this is a
2036 // no-op that always returns VcpuRunState::Runnable.
2037 match irq_chip.wait_until_runnable(&vcpu) {
2038 Ok(VcpuRunState::Runnable) => {}
2039 Ok(VcpuRunState::Interrupted) => interrupted_by_signal = true,
2040 Err(e) => error!(
2041 "error waiting for vcpu {} to become runnable: {}",
2042 cpu_id, e
2043 ),
2044 }
2045
2046 if !interrupted_by_signal {
2047 match vcpu.run(&vcpu_run_handle) {
2048 Ok(VcpuExit::IoIn { port, mut size }) => {
2049 let mut data = [0; 8];
2050 if size > data.len() {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002051 error!("unsupported IoIn size of {} bytes at port {:#x}", size, port);
Steven Richman11dc6712020-09-02 15:39:14 -07002052 size = data.len();
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002053 }
Steven Richman11dc6712020-09-02 15:39:14 -07002054 io_bus.read(port as u64, &mut data[..size]);
2055 if let Err(e) = vcpu.set_data(&data[..size]) {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002056 error!("failed to set return data for IoIn at port {:#x}: {}", port, e);
Steven Richman11dc6712020-09-02 15:39:14 -07002057 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002058 }
Steven Richman11dc6712020-09-02 15:39:14 -07002059 Ok(VcpuExit::IoOut {
2060 port,
2061 mut size,
2062 data,
2063 }) => {
2064 if size > data.len() {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002065 error!("unsupported IoOut size of {} bytes at port {:#x}", size, port);
Steven Richman11dc6712020-09-02 15:39:14 -07002066 size = data.len();
2067 }
2068 io_bus.write(port as u64, &data[..size]);
2069 }
2070 Ok(VcpuExit::MmioRead { address, size }) => {
2071 let mut data = [0; 8];
2072 mmio_bus.read(address, &mut data[..size]);
2073 // Setting data for mmio can not fail.
2074 let _ = vcpu.set_data(&data[..size]);
2075 }
2076 Ok(VcpuExit::MmioWrite {
2077 address,
2078 size,
2079 data,
2080 }) => {
2081 mmio_bus.write(address, &data[..size]);
2082 }
2083 Ok(VcpuExit::IoapicEoi { vector }) => {
2084 if let Err(e) = irq_chip.broadcast_eoi(vector) {
2085 error!(
2086 "failed to broadcast eoi {} on vcpu {}: {}",
2087 vector, cpu_id, e
2088 );
2089 }
2090 }
2091 Ok(VcpuExit::IrqWindowOpen) => {}
Leo Lai558460f2021-07-23 05:32:27 +00002092 Ok(VcpuExit::Hlt) => irq_chip.halted(cpu_id),
Steven Richman11dc6712020-09-02 15:39:14 -07002093 Ok(VcpuExit::Shutdown) => break,
2094 Ok(VcpuExit::FailEntry {
2095 hardware_entry_failure_reason,
2096 }) => {
2097 error!("vcpu hw run failure: {:#x}", hardware_entry_failure_reason);
Steven Richmanf32d0b42020-06-20 21:45:32 -07002098 break;
2099 }
Steven Richman11dc6712020-09-02 15:39:14 -07002100 Ok(VcpuExit::SystemEvent(_, _)) => break,
2101 Ok(VcpuExit::Debug { .. }) => {
2102 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2103 {
2104 let msg = VcpuDebugStatusMessage {
2105 cpu: cpu_id as usize,
2106 msg: VcpuDebugStatus::HitBreakPoint,
2107 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002108 if let Some(ref ch) = to_gdb_tube {
Steven Richman11dc6712020-09-02 15:39:14 -07002109 if let Err(e) = ch.send(msg) {
2110 error!("failed to notify breakpoint to GDB thread: {}", e);
2111 break;
2112 }
2113 }
2114 run_mode = VmRunMode::Breakpoint;
2115 }
2116 }
2117 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
2118 Err(e) => match e.errno() {
2119 libc::EINTR => interrupted_by_signal = true,
2120 libc::EAGAIN => {}
2121 _ => {
2122 error!("vcpu hit unknown error: {}", e);
2123 break;
2124 }
2125 },
2126 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002127 }
2128
2129 if interrupted_by_signal {
2130 if use_hypervisor_signals {
2131 // Try to clear the signal that we use to kick VCPU if it is pending before
2132 // attempting to handle pause requests.
2133 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
2134 error!("failed to clear pending signal: {}", e);
2135 break;
2136 }
2137 } else {
2138 vcpu.set_immediate_exit(false);
2139 }
David Tolnay8f3a2322018-11-30 17:11:35 -08002140 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002141
Steven Richman11dc6712020-09-02 15:39:14 -07002142 if let Err(e) = irq_chip.inject_interrupts(&vcpu) {
2143 error!("failed to inject interrupts for vcpu {}: {}", cpu_id, e);
2144 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002145 }
David Tolnay2bac1e72018-12-12 14:33:42 -08002146 })
2147 .map_err(Error::SpawnVcpu)
Zach Reizner39aa26b2017-12-12 18:03:23 -08002148}
2149
Zach Reiznera90649a2021-03-31 12:56:08 -07002150fn setup_vm_components(cfg: &Config) -> Result<VmComponents> {
David Tolnay2b089fc2019-03-04 15:33:22 -08002151 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +01002152 Some(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002153 open_file(
2154 initrd_path,
2155 true, /*read_only*/
2156 false, /*O_DIRECT*/
2157 )
2158 .map_err(|e| Error::OpenInitrd(initrd_path.to_owned(), e.into()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +01002159 )
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002160 } else {
2161 None
2162 };
2163
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002164 let vm_image = match cfg.executable_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +01002165 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002166 open_file(
2167 kernel_path,
2168 true, /*read_only*/
2169 false, /*O_DIRECT*/
2170 )
2171 .map_err(|e| Error::OpenKernel(kernel_path.to_owned(), e.into()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +01002172 ),
2173 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002174 open_file(bios_path, true /*read_only*/, false /*O_DIRECT*/)
Andrew Walbranbc55e302021-07-13 17:35:10 +01002175 .map_err(|e| Error::OpenBios(bios_path.to_owned(), e.into()))?,
2176 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002177 _ => panic!("Did not receive a bios or kernel, should be impossible."),
2178 };
2179
Will Deaconc48e7832021-07-30 19:03:06 +01002180 let swiotlb = if let Some(size) = cfg.swiotlb {
2181 Some(
2182 size.checked_mul(1024 * 1024)
2183 .ok_or(Error::SwiotlbTooLarge)?,
2184 )
2185 } else {
2186 match cfg.protected_vm {
2187 ProtectionType::Protected => Some(64 * 1024 * 1024),
2188 ProtectionType::Unprotected => None,
2189 }
2190 };
2191
Zach Reiznera90649a2021-03-31 12:56:08 -07002192 Ok(VmComponents {
Daniel Verkamp6a847062019-11-26 13:16:35 -08002193 memory_size: cfg
2194 .memory
2195 .unwrap_or(256)
2196 .checked_mul(1024 * 1024)
2197 .ok_or(Error::MemoryTooLarge)?,
Will Deaconc48e7832021-07-30 19:03:06 +01002198 swiotlb,
Dylan Reid059a1882018-07-23 17:58:09 -07002199 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -07002200 vcpu_affinity: cfg.vcpu_affinity.clone(),
Daniel Verkamp8a72afc2021-03-15 17:55:52 -07002201 cpu_clusters: cfg.cpu_clusters.clone(),
2202 cpu_capacity: cfg.cpu_capacity.clone(),
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002203 no_smt: cfg.no_smt,
Sergey Senozhatsky1e369c52021-04-13 20:23:51 +09002204 hugepages: cfg.hugepages,
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002205 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002206 android_fstab: cfg
2207 .android_fstab
2208 .as_ref()
David Tolnay2b089fc2019-03-04 15:33:22 -08002209 .map(|x| File::open(x).map_err(|e| Error::OpenAndroidFstab(x.to_path_buf(), e)))
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002210 .map_or(Ok(None), |v| v.map(Some))?,
Kansho Nishida282115b2019-12-18 13:13:14 +09002211 pstore: cfg.pstore.clone(),
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002212 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -07002213 extra_kernel_params: cfg.params.clone(),
Tomasz Jeznach42644642020-05-20 23:27:59 -07002214 acpi_sdts: cfg
2215 .acpi_tables
2216 .iter()
2217 .map(|path| SDT::from_file(path).map_err(|e| Error::OpenAcpiTable(path.clone(), e)))
2218 .collect::<Result<Vec<SDT>>>()?,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002219 rt_cpus: cfg.rt_cpus.clone(),
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002220 delay_rt: cfg.delay_rt,
Will Deacon7d2b8ac2020-10-06 18:51:12 +01002221 protected_vm: cfg.protected_vm,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002222 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznera90649a2021-03-31 12:56:08 -07002223 gdb: None,
Tomasz Jeznachccb26942021-03-30 22:44:11 -07002224 dmi_path: cfg.dmi_path.clone(),
Tomasz Jeznachd93c29f2021-04-12 11:00:24 -07002225 no_legacy: cfg.no_legacy,
Zach Reiznera90649a2021-03-31 12:56:08 -07002226 })
2227}
2228
Zach Reiznerdc748482021-04-14 13:59:30 -07002229pub fn run_config(cfg: Config) -> Result<()> {
2230 let components = setup_vm_components(&cfg)?;
2231
2232 let guest_mem_layout =
2233 Arch::guest_memory_layout(&components).map_err(Error::GuestMemoryLayout)?;
2234 let guest_mem = GuestMemory::new(&guest_mem_layout).map_err(Error::CreateGuestMemory)?;
2235 let mut mem_policy = MemoryPolicy::empty();
2236 if components.hugepages {
2237 mem_policy |= MemoryPolicy::USE_HUGEPAGES;
2238 }
2239 guest_mem.set_memory_policy(mem_policy);
2240 let kvm = Kvm::new_with_path(&cfg.kvm_device_path).map_err(Error::CreateKvm)?;
2241 let vm = KvmVm::new(&kvm, guest_mem).map_err(Error::CreateVm)?;
2242 let vm_clone = vm.try_clone().map_err(Error::CreateVm)?;
2243
2244 enum KvmIrqChip {
2245 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2246 Split(KvmSplitIrqChip),
2247 Kernel(KvmKernelIrqChip),
2248 }
2249
2250 impl KvmIrqChip {
2251 fn as_mut(&mut self) -> &mut dyn IrqChipArch {
2252 match self {
2253 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2254 KvmIrqChip::Split(i) => i,
2255 KvmIrqChip::Kernel(i) => i,
2256 }
2257 }
2258 }
2259
2260 let ioapic_host_tube;
2261 let mut irq_chip = if cfg.split_irqchip {
2262 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
2263 unimplemented!("KVM split irqchip mode only supported on x86 processors");
2264 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2265 {
2266 let (host_tube, ioapic_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2267 ioapic_host_tube = Some(host_tube);
2268 KvmIrqChip::Split(
2269 KvmSplitIrqChip::new(
2270 vm_clone,
2271 components.vcpu_count,
2272 ioapic_device_tube,
2273 Some(120),
2274 )
2275 .map_err(Error::CreateIrqChip)?,
2276 )
2277 }
2278 } else {
2279 ioapic_host_tube = None;
2280 KvmIrqChip::Kernel(
2281 KvmKernelIrqChip::new(vm_clone, components.vcpu_count).map_err(Error::CreateIrqChip)?,
2282 )
2283 };
2284
2285 run_vm::<KvmVcpu, KvmVm>(cfg, components, vm, irq_chip.as_mut(), ioapic_host_tube)
2286}
2287
2288fn run_vm<Vcpu, V>(
Zach Reiznera90649a2021-03-31 12:56:08 -07002289 cfg: Config,
2290 #[allow(unused_mut)] mut components: VmComponents,
Zach Reiznerdc748482021-04-14 13:59:30 -07002291 mut vm: V,
2292 irq_chip: &mut dyn IrqChipArch,
2293 ioapic_host_tube: Option<Tube>,
Zach Reiznera90649a2021-03-31 12:56:08 -07002294) -> Result<()>
2295where
2296 Vcpu: VcpuArch + 'static,
2297 V: VmArch + 'static,
Zach Reiznera90649a2021-03-31 12:56:08 -07002298{
2299 if cfg.sandbox {
2300 // Printing something to the syslog before entering minijail so that libc's syslogger has a
2301 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
2302 // access to those files will not be possible.
2303 info!("crosvm entering multiprocess mode");
2304 }
2305
Daniel Verkampf1439d42021-05-21 13:55:10 -07002306 #[cfg(feature = "usb")]
Zach Reiznera90649a2021-03-31 12:56:08 -07002307 let (usb_control_tube, usb_provider) =
2308 HostBackendDeviceProvider::new().map_err(Error::CreateUsbProvider)?;
Daniel Verkampf1439d42021-05-21 13:55:10 -07002309
Zach Reiznera90649a2021-03-31 12:56:08 -07002310 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
2311 // before any jailed devices have been spawned, so that we can catch any of them that fail very
2312 // quickly.
2313 let sigchld_fd = SignalFd::new(libc::SIGCHLD).map_err(Error::CreateSignalFd)?;
Dylan Reid059a1882018-07-23 17:58:09 -07002314
Zach Reiznera60744b2019-02-13 17:33:32 -08002315 let control_server_socket = match &cfg.socket_path {
2316 Some(path) => Some(UnlinkUnixSeqpacketListener(
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002317 UnixSeqpacketListener::bind(path).map_err(Error::CreateControlServer)?,
Zach Reiznera60744b2019-02-13 17:33:32 -08002318 )),
2319 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07002320 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002321
Zach Reiznera90649a2021-03-31 12:56:08 -07002322 let mut control_tubes = Vec::new();
2323
2324 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2325 if let Some(port) = cfg.gdb {
2326 // GDB needs a control socket to interrupt vcpus.
2327 let (gdb_host_tube, gdb_control_tube) = Tube::pair().map_err(Error::CreateTube)?;
2328 control_tubes.push(TaggedControlTube::Vm(gdb_host_tube));
2329 components.gdb = Some((port, gdb_control_tube));
2330 }
2331
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09002332 for wl_cfg in &cfg.vhost_user_wl {
2333 let wayland_host_tube = UnixSeqpacket::connect(&wl_cfg.vm_tube)
2334 .map(Tube::new)
2335 .map_err(Error::ConnectTube)?;
2336 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
2337 }
2338
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002339 let (wayland_host_tube, wayland_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2340 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
Dylan Reid059a1882018-07-23 17:58:09 -07002341 // Balloon gets a special socket so balloon requests can be forwarded from the main process.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002342 let (balloon_host_tube, balloon_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
Hikaru Nishidaaf3f3bb2021-05-21 12:03:54 +09002343 // Set recv timeout to avoid deadlock on sending BalloonControlCommand before guest is ready.
2344 balloon_host_tube
2345 .set_recv_timeout(Some(Duration::from_millis(100)))
2346 .map_err(Error::CreateTube)?;
Dylan Reid059a1882018-07-23 17:58:09 -07002347
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002348 // Create one control socket per disk.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002349 let mut disk_device_tubes = Vec::new();
2350 let mut disk_host_tubes = Vec::new();
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002351 let disk_count = cfg.disks.len();
2352 for _ in 0..disk_count {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002353 let (disk_host_tub, disk_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2354 disk_host_tubes.push(disk_host_tub);
2355 disk_device_tubes.push(disk_device_tube);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002356 }
2357
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002358 let mut pmem_device_tubes = Vec::new();
Daniel Verkampe1980a92020-02-07 11:00:55 -08002359 let pmem_count = cfg.pmem_devices.len();
2360 for _ in 0..pmem_count {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002361 let (pmem_host_tube, pmem_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2362 pmem_device_tubes.push(pmem_device_tube);
2363 control_tubes.push(TaggedControlTube::VmMsync(pmem_host_tube));
Daniel Verkampe1980a92020-02-07 11:00:55 -08002364 }
2365
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002366 let (gpu_host_tube, gpu_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2367 control_tubes.push(TaggedControlTube::VmMemory(gpu_host_tube));
Gurchetan Singh96beafc2019-05-15 09:46:52 -07002368
Zach Reiznerdc748482021-04-14 13:59:30 -07002369 if let Some(ioapic_host_tube) = ioapic_host_tube {
2370 control_tubes.push(TaggedControlTube::VmIrq(ioapic_host_tube));
2371 }
Zhuocheng Dingf2e90bf2019-12-02 15:50:20 +08002372
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002373 let battery = if cfg.battery_type.is_some() {
Daniel Verkampcfe49462021-08-19 17:11:05 -07002374 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(clippy::manual_map))]
Alex Lauf408c732020-11-10 18:24:04 +09002375 let jail = match simple_jail(&cfg, "battery")? {
Daniel Verkampcfe49462021-08-19 17:11:05 -07002376 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(unused_mut))]
Alex Lauf408c732020-11-10 18:24:04 +09002377 Some(mut jail) => {
2378 // Setup a bind mount to the system D-Bus socket if the powerd monitor is used.
2379 #[cfg(feature = "power-monitor-powerd")]
2380 {
2381 add_crosvm_user_to_jail(&mut jail, "battery")?;
2382
2383 // Create a tmpfs in the device's root directory so that we can bind mount files.
2384 jail.mount_with_data(
2385 Path::new("none"),
2386 Path::new("/"),
2387 "tmpfs",
2388 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
2389 "size=67108864",
2390 )?;
2391
2392 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
2393 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
2394 }
2395 Some(jail)
2396 }
2397 None => None,
2398 };
2399 (&cfg.battery_type, jail)
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002400 } else {
2401 (&cfg.battery_type, None)
2402 };
2403
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002404 let map_request: Arc<Mutex<Option<ExternalMapping>>> = Arc::new(Mutex::new(None));
2405
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002406 let fs_count = cfg
2407 .shared_dirs
2408 .iter()
2409 .filter(|sd| sd.kind == SharedDirKind::FS)
2410 .count();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002411 let mut fs_device_tubes = Vec::with_capacity(fs_count);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002412 for _ in 0..fs_count {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002413 let (fs_host_tube, fs_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2414 control_tubes.push(TaggedControlTube::Fs(fs_host_tube));
2415 fs_device_tubes.push(fs_device_tube);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002416 }
2417
Zach Reiznerdc748482021-04-14 13:59:30 -07002418 let exit_evt = Event::new().map_err(Error::CreateEvent)?;
2419 let mut sys_allocator = Arch::create_system_allocator(vm.get_memory());
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09002420
2421 // Allocate the ramoops region first. AArch64::build_vm() assumes this.
2422 let ramoops_region = match &components.pstore {
2423 Some(pstore) => Some(
2424 arch::pstore::create_memory_region(&mut vm, &mut sys_allocator, &pstore)
2425 .map_err(Error::Pstore)?,
2426 ),
2427 None => None,
2428 };
2429
Zide Chen71435c12021-03-03 15:02:02 -08002430 let phys_max_addr = Arch::get_phys_max_addr();
Peter Fangc2bba082021-04-19 18:40:24 -07002431 let mut pci_devices = create_devices(
Zach Reiznerdc748482021-04-14 13:59:30 -07002432 &cfg,
2433 &mut vm,
2434 &mut sys_allocator,
2435 &exit_evt,
Zide Chen71435c12021-03-03 15:02:02 -08002436 phys_max_addr,
Zach Reiznerdc748482021-04-14 13:59:30 -07002437 &mut control_tubes,
2438 wayland_device_tube,
2439 gpu_device_tube,
2440 balloon_device_tube,
2441 &mut disk_device_tubes,
2442 &mut pmem_device_tubes,
2443 &mut fs_device_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07002444 #[cfg(feature = "usb")]
Zach Reiznerdc748482021-04-14 13:59:30 -07002445 usb_provider,
2446 Arc::clone(&map_request),
2447 )?;
2448
Peter Fangc2bba082021-04-19 18:40:24 -07002449 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2450 for (device, _jail) in pci_devices.iter_mut() {
2451 let sdts = device
2452 .generate_acpi(components.acpi_sdts)
2453 .or_else(|| {
2454 error!("ACPI table generation error");
2455 None
2456 })
2457 .ok_or(Error::GenerateAcpi)?;
2458 components.acpi_sdts = sdts;
2459 }
2460
Kuo-Hsin Yang6139da62021-04-14 16:55:24 +08002461 #[cfg_attr(not(feature = "direct"), allow(unused_mut))]
Zach Reiznerdc748482021-04-14 13:59:30 -07002462 let mut linux = Arch::build_vm::<V, Vcpu>(
Trent Begin17ccaad2019-04-17 13:51:25 -06002463 components,
Zach Reiznerdc748482021-04-14 13:59:30 -07002464 &exit_evt,
2465 &mut sys_allocator,
Trent Begin17ccaad2019-04-17 13:51:25 -06002466 &cfg.serial_parameters,
Matt Delco45caf912019-11-13 08:11:09 -08002467 simple_jail(&cfg, "serial")?,
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002468 battery,
Zach Reiznera90649a2021-03-31 12:56:08 -07002469 vm,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09002470 ramoops_region,
Zach Reiznerdc748482021-04-14 13:59:30 -07002471 pci_devices,
2472 irq_chip,
Trent Begin17ccaad2019-04-17 13:51:25 -06002473 )
David Tolnaybe034262019-03-04 17:48:36 -08002474 .map_err(Error::BuildVm)?;
Lepton Wu60893882018-11-21 11:06:18 -08002475
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08002476 #[cfg(feature = "direct")]
2477 if let Some(pmio) = &cfg.direct_pmio {
2478 let direct_io =
2479 Arc::new(devices::DirectIo::new(&pmio.path, false).map_err(Error::DirectIo)?);
2480 for range in pmio.ranges.iter() {
2481 linux
2482 .io_bus
2483 .insert_sync(direct_io.clone(), range.0, range.1)
2484 .unwrap();
2485 }
2486 };
2487
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002488 #[cfg(feature = "direct")]
2489 let mut irqs = Vec::new();
2490
2491 #[cfg(feature = "direct")]
2492 for irq in &cfg.direct_level_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07002493 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002494 warn!("irq {} already reserved.", irq);
2495 }
2496 let trigger = Event::new().map_err(Error::CreateEvent)?;
2497 let resample = Event::new().map_err(Error::CreateEvent)?;
2498 linux
2499 .irq_chip
2500 .register_irq_event(*irq, &trigger, Some(&resample))
2501 .unwrap();
2502 let direct_irq =
2503 devices::DirectIrq::new(trigger, Some(resample)).map_err(Error::DirectIrq)?;
2504 direct_irq.irq_enable(*irq).map_err(Error::DirectIrq)?;
2505 irqs.push(direct_irq);
2506 }
2507
2508 #[cfg(feature = "direct")]
2509 for irq in &cfg.direct_edge_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07002510 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002511 warn!("irq {} already reserved.", irq);
2512 }
2513 let trigger = Event::new().map_err(Error::CreateEvent)?;
2514 linux
2515 .irq_chip
2516 .register_irq_event(*irq, &trigger, None)
2517 .unwrap();
2518 let direct_irq = devices::DirectIrq::new(trigger, None).map_err(Error::DirectIrq)?;
2519 direct_irq.irq_enable(*irq).map_err(Error::DirectIrq)?;
2520 irqs.push(direct_irq);
2521 }
2522
Nicholas Verneb57c1242021-07-05 19:11:39 +10002523 let gralloc = RutabagaGralloc::new().map_err(Error::CreateGrallocError)?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002524 run_control(
2525 linux,
Zach Reiznerdc748482021-04-14 13:59:30 -07002526 sys_allocator,
Zach Reiznera60744b2019-02-13 17:33:32 -08002527 control_server_socket,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002528 control_tubes,
2529 balloon_host_tube,
2530 &disk_host_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07002531 #[cfg(feature = "usb")]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002532 usb_control_tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07002533 exit_evt,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002534 sigchld_fd,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002535 cfg.sandbox,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002536 Arc::clone(&map_request),
Gurchetan Singh293913c2020-12-09 10:44:13 -08002537 gralloc,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002538 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07002539}
2540
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002541#[allow(dead_code)]
2542fn add_vfio_device<V: VmArch, Vcpu: VcpuArch>(
2543 linux: &mut RunnableLinuxVm<V, Vcpu>,
2544 sys_allocator: &mut SystemAllocator,
2545 cfg: &Config,
2546 control_tubes: &mut Vec<TaggedControlTube>,
2547 vfio_path: &Path,
2548) -> Result<()> {
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002549 let mut endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>> = BTreeMap::new();
2550 let (vfio_pci_device, jail) = create_vfio_device(
2551 cfg,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002552 &linux.vm,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002553 sys_allocator,
2554 control_tubes,
2555 vfio_path,
Xiong Zhange19ab752021-05-20 18:18:46 +08002556 true,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002557 &mut endpoints,
2558 false,
2559 )?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002560
2561 let pci_address = Arch::register_pci_device(linux, vfio_pci_device, jail, sys_allocator)
2562 .map_err(Error::ConfigureHotPlugDevice)?;
2563
2564 let host_os_str = vfio_path.file_name().ok_or(Error::InvalidVfioPath)?;
2565 let host_str = host_os_str.to_str().ok_or(Error::InvalidVfioPath)?;
2566 let host_addr = PciAddress::from_string(host_str);
2567 let host_key = HostHotPlugKey::Vfio { host_addr };
2568 if let Some(hp_bus) = &linux.hotplug_bus {
2569 let mut hp_bus = hp_bus.lock();
2570 hp_bus.add_hotplug_device(host_key, pci_address);
2571 hp_bus.hot_plug(pci_address);
2572 return Ok(());
2573 }
2574
2575 Err(Error::NoHotPlugBus)
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002576}
2577
2578#[allow(dead_code)]
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08002579fn remove_vfio_device<V: VmArch, Vcpu: VcpuArch>(
2580 linux: &RunnableLinuxVm<V, Vcpu>,
2581 vfio_path: &Path,
2582) -> Result<()> {
2583 let host_os_str = vfio_path.file_name().ok_or(Error::InvalidVfioPath)?;
2584 let host_str = host_os_str.to_str().ok_or(Error::InvalidVfioPath)?;
2585 let host_addr = PciAddress::from_string(host_str);
2586 let host_key = HostHotPlugKey::Vfio { host_addr };
2587 if let Some(hp_bus) = &linux.hotplug_bus {
2588 let mut hp_bus = hp_bus.lock();
2589 let pci_addr = hp_bus
2590 .get_hotplug_device(host_key)
2591 .ok_or(Error::InvalidHotPlugKey)?;
2592 hp_bus.hot_unplug(pci_addr);
2593 return Ok(());
2594 }
2595
2596 Err(Error::NoHotPlugBus)
2597}
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08002598
Daniel Verkamp29409802021-02-24 14:46:19 -08002599/// Signals all running VCPUs to vmexit, sends VcpuControl message to each VCPU tube, and tells
2600/// `irq_chip` to stop blocking halted VCPUs. The channel message is set first because both the
Steven Richman11dc6712020-09-02 15:39:14 -07002601/// signal and the irq_chip kick could cause the VCPU thread to continue through the VCPU run
2602/// loop.
2603fn kick_all_vcpus(
2604 vcpu_handles: &[(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)],
Zach Reiznerdc748482021-04-14 13:59:30 -07002605 irq_chip: &dyn IrqChip,
Daniel Verkamp29409802021-02-24 14:46:19 -08002606 message: VcpuControl,
Steven Richman11dc6712020-09-02 15:39:14 -07002607) {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002608 for (handle, tube) in vcpu_handles {
Daniel Verkamp29409802021-02-24 14:46:19 -08002609 if let Err(e) = tube.send(message.clone()) {
2610 error!("failed to send VcpuControl: {}", e);
Steven Richman11dc6712020-09-02 15:39:14 -07002611 }
2612 let _ = handle.kill(SIGRTMIN() + 0);
2613 }
2614 irq_chip.kick_halted_vcpus();
2615}
2616
Zach Reiznerdc748482021-04-14 13:59:30 -07002617fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
2618 mut linux: RunnableLinuxVm<V, Vcpu>,
2619 mut sys_allocator: SystemAllocator,
Zach Reiznera60744b2019-02-13 17:33:32 -08002620 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002621 mut control_tubes: Vec<TaggedControlTube>,
2622 balloon_host_tube: Tube,
2623 disk_host_tubes: &[Tube],
Daniel Verkampf1439d42021-05-21 13:55:10 -07002624 #[cfg(feature = "usb")] usb_control_tube: Tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07002625 exit_evt: Event,
Zach Reizner55a9e502018-10-03 10:22:32 -07002626 sigchld_fd: SignalFd,
Lepton Wu20333e42019-03-14 10:48:03 -07002627 sandbox: bool,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002628 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Gurchetan Singh293913c2020-12-09 10:44:13 -08002629 mut gralloc: RutabagaGralloc,
Zach Reizner55a9e502018-10-03 10:22:32 -07002630) -> Result<()> {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002631 #[derive(PollToken)]
2632 enum Token {
2633 Exit,
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002634 Suspend,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002635 ChildSignal,
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002636 IrqFd { index: IrqEventIndex },
Zach Reiznera60744b2019-02-13 17:33:32 -08002637 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002638 VmControl { index: usize },
2639 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002640
Zach Reizner19ad1f32019-12-12 18:58:50 -08002641 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08002642 .set_raw_mode()
2643 .expect("failed to set terminal raw mode");
2644
Michael Hoylee392c462020-10-07 03:29:24 -07002645 let wait_ctx = WaitContext::build_with(&[
Zach Reiznerdc748482021-04-14 13:59:30 -07002646 (&exit_evt, Token::Exit),
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002647 (&linux.suspend_evt, Token::Suspend),
Zach Reiznerb2110be2019-07-23 15:55:03 -07002648 (&sigchld_fd, Token::ChildSignal),
2649 ])
Michael Hoylee392c462020-10-07 03:29:24 -07002650 .map_err(Error::WaitContextAdd)?;
Zach Reiznerb2110be2019-07-23 15:55:03 -07002651
Zach Reiznera60744b2019-02-13 17:33:32 -08002652 if let Some(socket_server) = &control_server_socket {
Michael Hoylee392c462020-10-07 03:29:24 -07002653 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08002654 .add(socket_server, Token::VmControlServer)
Michael Hoylee392c462020-10-07 03:29:24 -07002655 .map_err(Error::WaitContextAdd)?;
Zach Reiznera60744b2019-02-13 17:33:32 -08002656 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002657 for (index, socket) in control_tubes.iter().enumerate() {
Michael Hoylee392c462020-10-07 03:29:24 -07002658 wait_ctx
Zach Reizner55a9e502018-10-03 10:22:32 -07002659 .add(socket.as_ref(), Token::VmControl { index })
Michael Hoylee392c462020-10-07 03:29:24 -07002660 .map_err(Error::WaitContextAdd)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002661 }
2662
Steven Richmanf32d0b42020-06-20 21:45:32 -07002663 let events = linux
2664 .irq_chip
2665 .irq_event_tokens()
Michael Hoylee392c462020-10-07 03:29:24 -07002666 .map_err(Error::WaitContextAdd)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002667
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002668 for (index, _gsi, evt) in events {
Michael Hoylee392c462020-10-07 03:29:24 -07002669 wait_ctx
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002670 .add(&evt, Token::IrqFd { index })
Michael Hoylee392c462020-10-07 03:29:24 -07002671 .map_err(Error::WaitContextAdd)?;
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002672 }
2673
Lepton Wu20333e42019-03-14 10:48:03 -07002674 if sandbox {
2675 // Before starting VCPUs, in case we started with some capabilities, drop them all.
2676 drop_capabilities().map_err(Error::DropCapabilities)?;
2677 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08002678
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002679 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2680 // Create a channel for GDB thread.
2681 let (to_gdb_channel, from_vcpu_channel) = if linux.gdb.is_some() {
2682 let (s, r) = mpsc::channel();
2683 (Some(s), Some(r))
2684 } else {
2685 (None, None)
2686 };
2687
Steven Richmanf32d0b42020-06-20 21:45:32 -07002688 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
2689 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
Steven Richmanf32d0b42020-06-20 21:45:32 -07002690 let use_hypervisor_signals = !linux
2691 .vm
2692 .get_hypervisor()
2693 .check_capability(&HypervisorCap::ImmediateExit);
Zach Reizner304e7312020-09-29 16:00:24 -07002694 setup_vcpu_signal_handler::<Vcpu>(use_hypervisor_signals)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002695
Zach Reizner304e7312020-09-29 16:00:24 -07002696 let vcpus: Vec<Option<_>> = match linux.vcpus.take() {
Andrew Walbran9cfdbd92021-01-11 17:40:34 +00002697 Some(vec) => vec.into_iter().map(Some).collect(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002698 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
2699 };
Daniel Verkamp94c35272019-09-12 13:31:30 -07002700 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Dylan Reidb0492662019-05-17 14:50:13 -07002701 let (to_vcpu_channel, from_main_channel) = mpsc::channel();
Daniel Verkampc677fb42020-09-08 13:47:49 -07002702 let vcpu_affinity = match linux.vcpu_affinity.clone() {
2703 Some(VcpuAffinity::Global(v)) => v,
2704 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(),
2705 None => Default::default(),
2706 };
Zach Reizner55a9e502018-10-03 10:22:32 -07002707 let handle = run_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002708 cpu_id,
Zach Reizner55a9e502018-10-03 10:22:32 -07002709 vcpu,
Michael Hoyle685316f2020-09-16 15:29:20 -07002710 linux.vm.try_clone().map_err(Error::CloneEvent)?,
Zach Reiznerdc748482021-04-14 13:59:30 -07002711 linux.irq_chip.try_box_clone().map_err(Error::CloneEvent)?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002712 linux.vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002713 linux.rt_cpus.contains(&cpu_id),
Daniel Verkampc677fb42020-09-08 13:47:49 -07002714 vcpu_affinity,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002715 linux.delay_rt,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002716 linux.no_smt,
Zach Reizner55a9e502018-10-03 10:22:32 -07002717 vcpu_thread_barrier.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002718 linux.has_bios,
Zach Reizner55a9e502018-10-03 10:22:32 -07002719 linux.io_bus.clone(),
2720 linux.mmio_bus.clone(),
Zach Reiznerdc748482021-04-14 13:59:30 -07002721 exit_evt.try_clone().map_err(Error::CloneEvent)?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002722 linux.vm.check_capability(VmCap::PvClockSuspend),
Dylan Reidb0492662019-05-17 14:50:13 -07002723 from_main_channel,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002724 use_hypervisor_signals,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002725 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2726 to_gdb_channel.clone(),
Zach Reizner55a9e502018-10-03 10:22:32 -07002727 )?;
Dylan Reidb0492662019-05-17 14:50:13 -07002728 vcpu_handles.push((handle, to_vcpu_channel));
Dylan Reid059a1882018-07-23 17:58:09 -07002729 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002730
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002731 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2732 // Spawn GDB thread.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002733 if let Some((gdb_port_num, gdb_control_tube)) = linux.gdb.take() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002734 let to_vcpu_channels = vcpu_handles
2735 .iter()
2736 .map(|(_handle, channel)| channel.clone())
2737 .collect();
2738 let target = GdbStub::new(
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002739 gdb_control_tube,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002740 to_vcpu_channels,
2741 from_vcpu_channel.unwrap(), // Must succeed to unwrap()
2742 );
2743 thread::Builder::new()
2744 .name("gdb".to_owned())
2745 .spawn(move || gdb_thread(target, gdb_port_num))
2746 .map_err(Error::SpawnGdbServer)?;
2747 };
2748
Dylan Reid059a1882018-07-23 17:58:09 -07002749 vcpu_thread_barrier.wait();
2750
Charles William Dick54045012021-07-27 19:11:53 +09002751 let mut balloon_stats_id: u64 = 0;
2752
Michael Hoylee392c462020-10-07 03:29:24 -07002753 'wait: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002754 let events = {
Michael Hoylee392c462020-10-07 03:29:24 -07002755 match wait_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002756 Ok(v) => v,
2757 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08002758 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08002759 break;
2760 }
2761 }
2762 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002763
Steven Richmanf32d0b42020-06-20 21:45:32 -07002764 if let Err(e) = linux.irq_chip.process_delayed_irq_events() {
2765 warn!("can't deliver delayed irqs: {}", e);
2766 }
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002767
Zach Reiznera60744b2019-02-13 17:33:32 -08002768 let mut vm_control_indices_to_remove = Vec::new();
Michael Hoylee392c462020-10-07 03:29:24 -07002769 for event in events.iter().filter(|e| e.is_readable) {
2770 match event.token {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002771 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002772 info!("vcpu requested shutdown");
Michael Hoylee392c462020-10-07 03:29:24 -07002773 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002774 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002775 Token::Suspend => {
2776 info!("VM requested suspend");
2777 linux.suspend_evt.read().unwrap();
Zach Reiznerdc748482021-04-14 13:59:30 -07002778 kick_all_vcpus(
2779 &vcpu_handles,
2780 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08002781 VcpuControl::RunState(VmRunMode::Suspending),
Zach Reiznerdc748482021-04-14 13:59:30 -07002782 );
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002783 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002784 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002785 // Print all available siginfo structs, then exit the loop.
David Tolnayf5032762018-12-03 10:46:45 -08002786 while let Some(siginfo) = sigchld_fd.read().map_err(Error::SignalFd)? {
Zach Reizner3ba00982019-01-23 19:04:43 -08002787 let pid = siginfo.ssi_pid;
2788 let pid_label = match linux.pid_debug_label_map.get(&pid) {
2789 Some(label) => format!("{} (pid {})", label, pid),
2790 None => format!("pid {}", pid),
2791 };
David Tolnayf5032762018-12-03 10:46:45 -08002792 error!(
2793 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08002794 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08002795 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08002796 }
Michael Hoylee392c462020-10-07 03:29:24 -07002797 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002798 }
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002799 Token::IrqFd { index } => {
2800 if let Err(e) = linux.irq_chip.service_irq_event(index) {
2801 error!("failed to signal irq {}: {}", index, e);
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002802 }
2803 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002804 Token::VmControlServer => {
2805 if let Some(socket_server) = &control_server_socket {
2806 match socket_server.accept() {
2807 Ok(socket) => {
Michael Hoylee392c462020-10-07 03:29:24 -07002808 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08002809 .add(
2810 &socket,
2811 Token::VmControl {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002812 index: control_tubes.len(),
Zach Reiznera60744b2019-02-13 17:33:32 -08002813 },
2814 )
Michael Hoylee392c462020-10-07 03:29:24 -07002815 .map_err(Error::WaitContextAdd)?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002816 control_tubes.push(TaggedControlTube::Vm(Tube::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08002817 }
2818 Err(e) => error!("failed to accept socket: {}", e),
2819 }
2820 }
2821 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002822 Token::VmControl { index } => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002823 if let Some(socket) = control_tubes.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002824 match socket {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002825 TaggedControlTube::Vm(tube) => match tube.recv::<VmRequest>() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002826 Ok(request) => {
2827 let mut run_mode_opt = None;
2828 let response = request.execute(
2829 &mut run_mode_opt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002830 &balloon_host_tube,
Charles William Dick54045012021-07-27 19:11:53 +09002831 &mut balloon_stats_id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002832 disk_host_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07002833 #[cfg(feature = "usb")]
2834 Some(&usb_control_tube),
2835 #[cfg(not(feature = "usb"))]
2836 None,
Chuanxiao Dong256be3a2020-04-27 16:39:33 +08002837 &mut linux.bat_control,
Suleiman Souhlal2ac78b92021-02-01 12:33:26 +09002838 &vcpu_handles,
Jakub Starond99cd0a2019-04-11 14:09:39 -07002839 );
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002840 if let Err(e) = tube.send(&response) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002841 error!("failed to send VmResponse: {}", e);
2842 }
2843 if let Some(run_mode) = run_mode_opt {
2844 info!("control socket changed run mode to {}", run_mode);
2845 match run_mode {
2846 VmRunMode::Exiting => {
Michael Hoylee392c462020-10-07 03:29:24 -07002847 break 'wait;
Jakub Starond99cd0a2019-04-11 14:09:39 -07002848 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002849 other => {
Chuanxiao Dong2bbe85c2020-11-12 17:18:07 +08002850 if other == VmRunMode::Running {
Daniel Verkampda4e8a92021-07-21 13:49:02 -07002851 for dev in &linux.resume_notify_devices {
2852 dev.lock().resume_imminent();
2853 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002854 }
Steven Richman11dc6712020-09-02 15:39:14 -07002855 kick_all_vcpus(
2856 &vcpu_handles,
Zach Reiznerdc748482021-04-14 13:59:30 -07002857 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08002858 VcpuControl::RunState(other),
Steven Richman11dc6712020-09-02 15:39:14 -07002859 );
Zach Reizner6a8fdd92019-01-16 14:38:41 -08002860 }
2861 }
2862 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002863 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07002864 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002865 if let TubeError::Disconnected = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002866 vm_control_indices_to_remove.push(index);
2867 } else {
2868 error!("failed to recv VmRequest: {}", e);
2869 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002870 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07002871 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002872 TaggedControlTube::VmMemory(tube) => {
2873 match tube.recv::<VmMemoryRequest>() {
2874 Ok(request) => {
2875 let response = request.execute(
2876 &mut linux.vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07002877 &mut sys_allocator,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002878 Arc::clone(&map_request),
2879 &mut gralloc,
2880 );
2881 if let Err(e) = tube.send(&response) {
2882 error!("failed to send VmMemoryControlResponse: {}", e);
2883 }
2884 }
2885 Err(e) => {
2886 if let TubeError::Disconnected = e {
2887 vm_control_indices_to_remove.push(index);
2888 } else {
2889 error!("failed to recv VmMemoryControlRequest: {}", e);
2890 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07002891 }
2892 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002893 }
2894 TaggedControlTube::VmIrq(tube) => match tube.recv::<VmIrqRequest>() {
Xiong Zhang2515b752019-09-19 10:29:02 +08002895 Ok(request) => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002896 let response = {
2897 let irq_chip = &mut linux.irq_chip;
2898 request.execute(
2899 |setup| match setup {
2900 IrqSetup::Event(irq, ev) => {
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002901 if let Some(event_index) = irq_chip
2902 .register_irq_event(irq, ev, None)?
2903 {
2904 match wait_ctx.add(
2905 ev,
2906 Token::IrqFd {
2907 index: event_index
2908 },
2909 ) {
2910 Err(e) => {
2911 warn!("failed to add IrqFd to poll context: {}", e);
2912 Err(e)
2913 },
2914 Ok(_) => {
2915 Ok(())
2916 }
2917 }
2918 } else {
2919 Ok(())
2920 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002921 }
2922 IrqSetup::Route(route) => irq_chip.route_irq(route),
2923 },
Zach Reiznerdc748482021-04-14 13:59:30 -07002924 &mut sys_allocator,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002925 )
2926 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002927 if let Err(e) = tube.send(&response) {
Xiong Zhang2515b752019-09-19 10:29:02 +08002928 error!("failed to send VmIrqResponse: {}", e);
2929 }
2930 }
2931 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002932 if let TubeError::Disconnected = e {
Xiong Zhang2515b752019-09-19 10:29:02 +08002933 vm_control_indices_to_remove.push(index);
2934 } else {
2935 error!("failed to recv VmIrqRequest: {}", e);
2936 }
2937 }
2938 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002939 TaggedControlTube::VmMsync(tube) => {
2940 match tube.recv::<VmMsyncRequest>() {
2941 Ok(request) => {
2942 let response = request.execute(&mut linux.vm);
2943 if let Err(e) = tube.send(&response) {
2944 error!("failed to send VmMsyncResponse: {}", e);
2945 }
2946 }
2947 Err(e) => {
2948 if let TubeError::Disconnected = e {
2949 vm_control_indices_to_remove.push(index);
2950 } else {
2951 error!("failed to recv VmMsyncRequest: {}", e);
2952 }
Daniel Verkampe1980a92020-02-07 11:00:55 -08002953 }
2954 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002955 }
2956 TaggedControlTube::Fs(tube) => match tube.recv::<FsMappingRequest>() {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002957 Ok(request) => {
2958 let response =
Zach Reiznerdc748482021-04-14 13:59:30 -07002959 request.execute(&mut linux.vm, &mut sys_allocator);
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002960 if let Err(e) = tube.send(&response) {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002961 error!("failed to send VmResponse: {}", e);
2962 }
2963 }
2964 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002965 if let TubeError::Disconnected = e {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002966 vm_control_indices_to_remove.push(index);
2967 } else {
2968 error!("failed to recv VmResponse: {}", e);
2969 }
2970 }
2971 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08002972 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002973 }
2974 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002975 }
2976 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002977
Vikram Auradkarede68c72021-07-01 14:33:54 -07002978 // It's possible more data is readable and buffered while the socket is hungup,
2979 // so don't delete the tube from the poll context until we're sure all the
2980 // data is read.
2981 // Below case covers a condition where we have received a hungup event and the tube is not
2982 // readable.
2983 // In case of readable tube, once all data is read, any attempt to read more data on hungup
2984 // tube should fail. On such failure, we get Disconnected error and index gets added to
2985 // vm_control_indices_to_remove by the time we reach here.
2986 for event in events.iter().filter(|e| e.is_hungup && !e.is_readable) {
2987 if let Token::VmControl { index } = event.token {
2988 vm_control_indices_to_remove.push(index);
Zach Reizner39aa26b2017-12-12 18:03:23 -08002989 }
2990 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002991
2992 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08002993 // preserves correct indexes as each element is removed.
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07002994 vm_control_indices_to_remove.sort_unstable_by_key(|&k| Reverse(k));
Zach Reiznera60744b2019-02-13 17:33:32 -08002995 vm_control_indices_to_remove.dedup();
2996 for index in vm_control_indices_to_remove {
Michael Hoylee392c462020-10-07 03:29:24 -07002997 // Delete the socket from the `wait_ctx` synchronously. Otherwise, the kernel will do
2998 // this automatically when the FD inserted into the `wait_ctx` is closed after this
Zide Chen89584072019-11-14 10:33:51 -08002999 // if-block, but this removal can be deferred unpredictably. In some instances where the
Michael Hoylee392c462020-10-07 03:29:24 -07003000 // system is under heavy load, we can even get events returned by `wait_ctx` for an FD
Zide Chen89584072019-11-14 10:33:51 -08003001 // that has already been closed. Because the token associated with that spurious event
3002 // now belongs to a different socket, the control loop will start to interact with
3003 // sockets that might not be ready to use. This can cause incorrect hangup detection or
3004 // blocking on a socket that will never be ready. See also: crbug.com/1019986
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003005 if let Some(socket) = control_tubes.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07003006 wait_ctx.delete(socket).map_err(Error::WaitContextDelete)?;
Zide Chen89584072019-11-14 10:33:51 -08003007 }
3008
3009 // This line implicitly drops the socket at `index` when it gets returned by
3010 // `swap_remove`. After this line, the socket at `index` is not the one from
3011 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
Michael Hoylee392c462020-10-07 03:29:24 -07003012 // use `wait_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003013 control_tubes.swap_remove(index);
3014 if let Some(tube) = control_tubes.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07003015 wait_ctx
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003016 .modify(tube, EventType::Read, Token::VmControl { index })
Michael Hoylee392c462020-10-07 03:29:24 -07003017 .map_err(Error::WaitContextAdd)?;
Zach Reiznera60744b2019-02-13 17:33:32 -08003018 }
3019 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003020 }
3021
Zach Reiznerdc748482021-04-14 13:59:30 -07003022 kick_all_vcpus(
3023 &vcpu_handles,
3024 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003025 VcpuControl::RunState(VmRunMode::Exiting),
Zach Reiznerdc748482021-04-14 13:59:30 -07003026 );
Steven Richman11dc6712020-09-02 15:39:14 -07003027 for (handle, _) in vcpu_handles {
3028 if let Err(e) = handle.join() {
3029 error!("failed to join vcpu thread: {:?}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003030 }
3031 }
3032
Daniel Verkamp94c35272019-09-12 13:31:30 -07003033 // Explicitly drop the VM structure here to allow the devices to clean up before the
3034 // control sockets are closed when this function exits.
3035 mem::drop(linux);
3036
Zach Reizner19ad1f32019-12-12 18:58:50 -08003037 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08003038 .set_canon_mode()
3039 .expect("failed to restore canonical mode for terminal");
3040
3041 Ok(())
3042}