blob: d39dfcd3b2f0077ccfbe25525b19f8ce10f38706 [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Hikaru Nishida584e52c2021-04-27 17:37:08 +09005use std::cmp::Reverse;
Chia-I Wu7f0f7c12022-01-12 10:42:18 -08006use std::collections::{BTreeMap, HashSet};
Jakub Starona3411ea2019-04-24 10:55:25 -07007use std::convert::TryFrom;
John Batesb220eac2020-09-14 17:03:02 -07008#[cfg(feature = "gpu")]
9use std::env;
Dylan Reid059a1882018-07-23 17:58:09 -070010use std::fs::{File, OpenOptions};
Federico 'Morg' Pareschia1184822021-09-09 10:52:58 +090011use std::io::stdin;
Steven Richmanf32d0b42020-06-20 21:45:32 -070012use std::iter;
Daniel Verkamp94c35272019-09-12 13:31:30 -070013use std::mem;
David Tolnay2b089fc2019-03-04 15:33:22 -080014use std::net::Ipv4Addr;
Abhishek Bhardwaj103c1b72021-11-01 15:52:23 -070015use std::os::unix::net::UnixListener;
Christian Blichmann50f95912021-11-05 16:59:39 +010016use std::os::unix::{io::FromRawFd, net::UnixStream, prelude::OpenOptionsExt};
Zach Reizner39aa26b2017-12-12 18:03:23 -080017use std::path::{Path, PathBuf};
Chirantan Ekbote448516e2018-07-24 16:07:42 -070018use std::str;
Dylan Reidb0492662019-05-17 14:50:13 -070019use std::sync::{mpsc, Arc, Barrier};
Hikaru Nishida584e52c2021-04-27 17:37:08 +090020use std::time::Duration;
Dylan Reidb0492662019-05-17 14:50:13 -070021
Zach Reizner39aa26b2017-12-12 18:03:23 -080022use std::thread;
23use std::thread::JoinHandle;
24
Daniel Verkamp6b298582021-08-16 15:37:11 -070025use libc::{self, c_int, gid_t, uid_t};
Zach Reizner39aa26b2017-12-12 18:03:23 -080026
Tomasz Jeznach42644642020-05-20 23:27:59 -070027use acpi_tables::sdt::SDT;
28
Daniel Verkamp6b298582021-08-16 15:37:11 -070029use anyhow::{anyhow, bail, Context, Result};
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +090030use base::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080031use base::*;
Keiichi Watanabe553d2192021-08-16 16:42:27 +090032use devices::serial_device::{SerialHardware, SerialParameters};
Zide Chenafdb9382021-06-17 12:04:43 -070033use devices::vfio::{VfioCommonSetup, VfioCommonTrait};
Woody Chow0b2b6062021-09-03 15:40:02 +090034#[cfg(feature = "audio_cras")]
35use devices::virtio::snd::cras_backend::Parameters as CrasSndParameters;
Abhishek Bhardwaj103c1b72021-11-01 15:52:23 -070036use devices::virtio::vhost::user::proxy::VirtioVhostUser;
Woody Chow1b16db12021-04-02 16:59:59 +090037#[cfg(feature = "audio")]
38use devices::virtio::vhost::user::vmm::Snd as VhostUserSnd;
Keiichi Watanabefb36e0c2021-08-13 18:48:31 +090039use devices::virtio::vhost::user::vmm::{
Richard5afeafa2021-07-26 19:02:09 -070040 Block as VhostUserBlock, Console as VhostUserConsole, Fs as VhostUserFs,
Chirantan Ekbote84091e52021-09-10 18:43:17 +090041 Mac80211Hwsim as VhostUserMac80211Hwsim, Net as VhostUserNet, Vsock as VhostUserVsock,
42 Wl as VhostUserWl,
Keiichi Watanabe60686582021-03-12 04:53:51 +090043};
Alexandre Courbotb42b3e52021-07-09 23:38:57 +090044#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
45use devices::virtio::VideoBackendType;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070046use devices::virtio::{self, Console, VirtioDevice};
Chirantan Ekbote44292f52021-06-25 18:31:41 +090047#[cfg(feature = "gpu")]
48use devices::virtio::{
Chia-I Wu16fb6592021-11-10 11:45:32 -080049 gpu::{GpuRenderServerParameters, DEFAULT_DISPLAY_HEIGHT, DEFAULT_DISPLAY_WIDTH},
Chirantan Ekbote44292f52021-06-25 18:31:41 +090050 vhost::user::vmm::Gpu as VhostUserGpu,
51 EventDevice,
52};
paulhsiace17e6e2020-08-28 18:37:45 +080053#[cfg(feature = "audio")]
54use devices::Ac97Dev;
Xiong Zhang17b0daf2019-04-23 17:14:50 +080055use devices::{
Xiong Zhangf82f2dc2021-05-21 16:54:12 +080056 self, BusDeviceObj, HostHotPlugKey, HotPlugBus, IrqChip, IrqEventIndex, KvmKernelIrqChip,
57 PciAddress, PciBridge, PciDevice, PcieRootPort, StubPciDevice, VcpuRunState, VfioContainer,
58 VfioDevice, VfioPciDevice, VfioPlatformDevice, VirtioPciDevice,
Xiong Zhang17b0daf2019-04-23 17:14:50 +080059};
Daniel Verkampf1439d42021-05-21 13:55:10 -070060#[cfg(feature = "usb")]
61use devices::{HostBackendDeviceProvider, XhciController};
Steven Richmanf32d0b42020-06-20 21:45:32 -070062use hypervisor::kvm::{Kvm, KvmVcpu, KvmVm};
Andrew Walbran00f1c9f2021-12-10 17:13:08 +000063use hypervisor::{HypervisorCap, ProtectionType, Vcpu, VcpuExit, VcpuRunHandle, Vm, VmCap};
Allen Webbf3024c82020-06-19 07:19:48 -070064use minijail::{self, Minijail};
Richard5afeafa2021-07-26 19:02:09 -070065use net_util::{MacAddress, Tap};
Xiong Zhang87a3b442019-10-29 17:32:44 +080066use resources::{Alloc, MmioType, SystemAllocator};
Gurchetan Singh293913c2020-12-09 10:44:13 -080067use rutabaga_gfx::RutabagaGralloc;
Dylan Reidb0492662019-05-17 14:50:13 -070068use sync::Mutex;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080069use vm_control::*;
Sergey Senozhatskyd78d05b2021-04-13 20:59:58 +090070use vm_memory::{GuestAddress, GuestMemory, MemoryPolicy};
Zach Reizner39aa26b2017-12-12 18:03:23 -080071
Keiichi Watanabec5262e92020-10-21 15:57:33 +090072#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
73use crate::gdb::{gdb_thread, GdbStub};
Keiichi Watanabef3a37f42021-01-21 15:41:11 +090074use crate::{
Tomasz Nowicki71aca792021-06-09 18:53:49 +000075 Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption, VfioType,
Christian Blichmann50f95912021-11-05 16:59:39 +010076 VhostUserFsOption, VhostUserOption, VhostUserWlOption, VhostVsockDeviceParameter,
Keiichi Watanabef3a37f42021-01-21 15:41:11 +090077};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070078use arch::{
Keiichi Watanabe553d2192021-08-16 16:42:27 +090079 self, LinuxArch, RunnableLinuxVm, VcpuAffinity, VirtioDeviceStub, VmComponents, VmImage,
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070080};
Sonny Raoed517d12018-02-13 22:09:43 -080081
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080082#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070083use {
84 aarch64::AArch64 as Arch,
Steven Richman11dc6712020-09-02 15:39:14 -070085 devices::IrqChipAArch64 as IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -070086 hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
87};
Zach Reizner55a9e502018-10-03 10:22:32 -070088#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070089use {
Steven Richman11dc6712020-09-02 15:39:14 -070090 devices::{IrqChipX86_64 as IrqChipArch, KvmSplitIrqChip},
91 hypervisor::{VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
Steven Richmanf32d0b42020-06-20 21:45:32 -070092 x86_64::X8664arch as Arch,
93};
Zach Reizner39aa26b2017-12-12 18:03:23 -080094
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080095enum TaggedControlTube {
96 Fs(Tube),
97 Vm(Tube),
98 VmMemory(Tube),
99 VmIrq(Tube),
100 VmMsync(Tube),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700101}
102
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800103impl AsRef<Tube> for TaggedControlTube {
104 fn as_ref(&self) -> &Tube {
105 use self::TaggedControlTube::*;
Jakub Starond99cd0a2019-04-11 14:09:39 -0700106 match &self {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800107 Fs(tube) | Vm(tube) | VmMemory(tube) | VmIrq(tube) | VmMsync(tube) => tube,
Jakub Starond99cd0a2019-04-11 14:09:39 -0700108 }
109 }
110}
111
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800112impl AsRawDescriptor for TaggedControlTube {
Michael Hoylee392c462020-10-07 03:29:24 -0700113 fn as_raw_descriptor(&self) -> RawDescriptor {
Michael Hoylea596a072020-11-10 19:32:45 -0800114 self.as_ref().as_raw_descriptor()
Jakub Starond99cd0a2019-04-11 14:09:39 -0700115 }
116}
117
Matt Delcoc24ad782020-02-14 13:24:36 -0800118struct SandboxConfig<'a> {
119 limit_caps: bool,
120 log_failures: bool,
121 seccomp_policy: &'a Path,
122 uid_map: Option<&'a str>,
123 gid_map: Option<&'a str>,
124}
125
Zach Reizner44863792019-06-26 14:22:08 -0700126fn create_base_minijail(
127 root: &Path,
Matt Delcoc24ad782020-02-14 13:24:36 -0800128 r_limit: Option<u64>,
129 config: Option<&SandboxConfig>,
Zach Reizner44863792019-06-26 14:22:08 -0700130) -> Result<Minijail> {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800131 // All child jails run in a new user namespace without any users mapped,
132 // they run as nobody unless otherwise configured.
Daniel Verkamp6b298582021-08-16 15:37:11 -0700133 let mut j = Minijail::new().context("failed to jail device")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800134
135 if let Some(config) = config {
136 j.namespace_pids();
137 j.namespace_user();
138 j.namespace_user_disable_setgroups();
139 if config.limit_caps {
140 // Don't need any capabilities.
141 j.use_caps(0);
142 }
143 if let Some(uid_map) = config.uid_map {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700144 j.uidmap(uid_map).context("error setting UID map")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800145 }
146 if let Some(gid_map) = config.gid_map {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700147 j.gidmap(gid_map).context("error setting GID map")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800148 }
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900149 // Run in a new mount namespace.
150 j.namespace_vfs();
151
Matt Delcoc24ad782020-02-14 13:24:36 -0800152 // Run in an empty network namespace.
153 j.namespace_net();
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900154
155 // Don't allow the device to gain new privileges.
Matt Delcoc24ad782020-02-14 13:24:36 -0800156 j.no_new_privs();
157
158 // By default we'll prioritize using the pre-compiled .bpf over the .policy
159 // file (the .bpf is expected to be compiled using "trap" as the failure
160 // behavior instead of the default "kill" behavior).
161 // Refer to the code comment for the "seccomp-log-failures"
162 // command-line parameter for an explanation about why the |log_failures|
163 // flag forces the use of .policy files (and the build-time alternative to
164 // this run-time flag).
165 let bpf_policy_file = config.seccomp_policy.with_extension("bpf");
166 if bpf_policy_file.exists() && !config.log_failures {
167 j.parse_seccomp_program(&bpf_policy_file)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700168 .context("failed to parse precompiled seccomp policy")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800169 } else {
170 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP,
171 // which will correctly kill the entire device process if a worker
172 // thread commits a seccomp violation.
173 j.set_seccomp_filter_tsync();
174 if config.log_failures {
175 j.log_seccomp_filter_failures();
176 }
177 j.parse_seccomp_filters(&config.seccomp_policy.with_extension("policy"))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700178 .context("failed to parse seccomp policy")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800179 }
180 j.use_seccomp_filter();
181 // Don't do init setup.
182 j.run_as_init();
183 }
184
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900185 // Only pivot_root if we are not re-using the current root directory.
186 if root != Path::new("/") {
187 // It's safe to call `namespace_vfs` multiple times.
188 j.namespace_vfs();
Daniel Verkamp6b298582021-08-16 15:37:11 -0700189 j.enter_pivot_root(root)
190 .context("failed to pivot root device")?;
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900191 }
Matt Delco45caf912019-11-13 08:11:09 -0800192
Matt Delcoc24ad782020-02-14 13:24:36 -0800193 // Most devices don't need to open many fds.
194 let limit = if let Some(r) = r_limit { r } else { 1024u64 };
195 j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700196 .context("error setting max open files")?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800197
Zach Reizner39aa26b2017-12-12 18:03:23 -0800198 Ok(j)
199}
200
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800201fn simple_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
Lepton Wu9105e9f2019-03-14 11:38:31 -0700202 if cfg.sandbox {
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800203 let pivot_root: &str = option_env!("DEFAULT_PIVOT_ROOT").unwrap_or("/var/empty");
204 // A directory for a jailed device's pivot root.
205 let root_path = Path::new(pivot_root);
206 if !root_path.exists() {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700207 bail!("{} doesn't exist, can't jail devices", pivot_root);
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800208 }
209 let policy_path: PathBuf = cfg.seccomp_policy_dir.join(policy);
Matt Delcoc24ad782020-02-14 13:24:36 -0800210 let config = SandboxConfig {
211 limit_caps: true,
212 log_failures: cfg.seccomp_log_failures,
213 seccomp_policy: &policy_path,
214 uid_map: None,
215 gid_map: None,
216 };
217 Ok(Some(create_base_minijail(root_path, None, Some(&config))?))
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800218 } else {
219 Ok(None)
220 }
221}
222
Daniel Verkamp6b298582021-08-16 15:37:11 -0700223type DeviceResult<T = VirtioDeviceStub> = Result<T>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800224
Andrew Walbran4cad30a2021-06-28 15:58:08 +0000225fn create_block_device(cfg: &Config, disk: &DiskOption, disk_device_tube: Tube) -> DeviceResult {
Junichi Uekawa7bea39f2021-07-16 14:05:06 +0900226 let raw_image: File = open_file(&disk.path, disk.read_only, disk.o_direct)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700227 .with_context(|| format!("failed to load disk image {}", disk.path.display()))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800228 // Lock the disk image to prevent other crosvm instances from using it.
229 let lock_op = if disk.read_only {
230 FlockOperation::LockShared
231 } else {
232 FlockOperation::LockExclusive
233 };
Daniel Verkamp6b298582021-08-16 15:37:11 -0700234 flock(&raw_image, lock_op, true).context("failed to lock disk image")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800235
Junichi Uekawa52437db2021-09-29 17:33:07 +0900236 info!("Trying to attach block device: {}", disk.path.display());
Daniel Verkamp6b298582021-08-16 15:37:11 -0700237 let dev = if disk::async_ok(&raw_image).context("failed to check disk async_ok")? {
238 let async_file = disk::create_async_disk_file(raw_image)
239 .context("failed to create async virtual disk")?;
Dylan Reid503c5ab2020-07-17 11:20:07 -0700240 Box::new(
241 virtio::BlockAsync::new(
242 virtio::base_features(cfg.protected_vm),
243 async_file,
244 disk.read_only,
245 disk.sparse,
246 disk.block_size,
Daniel Verkampdd0ee592021-03-29 13:05:22 -0700247 disk.id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800248 Some(disk_device_tube),
Dylan Reid503c5ab2020-07-17 11:20:07 -0700249 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700250 .context("failed to create block device")?,
Dylan Reid503c5ab2020-07-17 11:20:07 -0700251 ) as Box<dyn VirtioDevice>
252 } else {
Daniel Verkampeb1640e2021-09-07 14:09:31 -0700253 let disk_file = disk::create_disk_file(raw_image, disk::MAX_NESTING_DEPTH)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700254 .context("failed to create virtual disk")?;
Dylan Reid503c5ab2020-07-17 11:20:07 -0700255 Box::new(
256 virtio::Block::new(
257 virtio::base_features(cfg.protected_vm),
258 disk_file,
259 disk.read_only,
260 disk.sparse,
261 disk.block_size,
262 disk.id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800263 Some(disk_device_tube),
Dylan Reid503c5ab2020-07-17 11:20:07 -0700264 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700265 .context("failed to create block device")?,
Dylan Reid503c5ab2020-07-17 11:20:07 -0700266 ) as Box<dyn VirtioDevice>
267 };
David Tolnay2b089fc2019-03-04 15:33:22 -0800268
269 Ok(VirtioDeviceStub {
Dylan Reid503c5ab2020-07-17 11:20:07 -0700270 dev,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700271 jail: simple_jail(cfg, "block_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800272 })
273}
274
Keiichi Watanabef3a37f42021-01-21 15:41:11 +0900275fn create_vhost_user_block_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
276 let dev = VhostUserBlock::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700277 .context("failed to set up vhost-user block device")?;
Keiichi Watanabef3a37f42021-01-21 15:41:11 +0900278
279 Ok(VirtioDeviceStub {
280 dev: Box::new(dev),
281 // no sandbox here because virtqueue handling is exported to a different process.
282 jail: None,
283 })
284}
285
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +0900286fn create_vhost_user_console_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
287 let dev = VhostUserConsole::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700288 .context("failed to set up vhost-user console device")?;
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +0900289
290 Ok(VirtioDeviceStub {
291 dev: Box::new(dev),
292 // no sandbox here because virtqueue handling is exported to a different process.
293 jail: None,
294 })
295}
296
Woody Chow5890b702021-02-12 14:57:02 +0900297fn create_vhost_user_fs_device(cfg: &Config, option: &VhostUserFsOption) -> DeviceResult {
298 let dev = VhostUserFs::new(
299 virtio::base_features(cfg.protected_vm),
300 &option.socket,
301 &option.tag,
302 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700303 .context("failed to set up vhost-user fs device")?;
Woody Chow5890b702021-02-12 14:57:02 +0900304
305 Ok(VirtioDeviceStub {
306 dev: Box::new(dev),
307 // no sandbox here because virtqueue handling is exported to a different process.
308 jail: None,
309 })
310}
311
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900312fn create_vhost_user_mac80211_hwsim_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
313 let dev = VhostUserMac80211Hwsim::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700314 .context("failed to set up vhost-user mac80211_hwsim device")?;
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900315
316 Ok(VirtioDeviceStub {
317 dev: Box::new(dev),
318 // no sandbox here because virtqueue handling is exported to a different process.
319 jail: None,
320 })
321}
322
Woody Chow1b16db12021-04-02 16:59:59 +0900323#[cfg(feature = "audio")]
324fn create_vhost_user_snd_device(cfg: &Config, option: &VhostUserOption) -> DeviceResult {
325 let dev = VhostUserSnd::new(virtio::base_features(cfg.protected_vm), &option.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700326 .context("failed to set up vhost-user snd device")?;
Woody Chow1b16db12021-04-02 16:59:59 +0900327
328 Ok(VirtioDeviceStub {
329 dev: Box::new(dev),
330 // no sandbox here because virtqueue handling is exported to a different process.
331 jail: None,
332 })
333}
334
Abhishek Bhardwaj103c1b72021-11-01 15:52:23 -0700335fn create_vvu_proxy_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
336 let listener = UnixListener::bind(&opt.socket).map_err(|e| {
337 error!("failed to bind listener for vvu proxy device: {}", e);
338 e
339 })?;
340
341 let dev = VirtioVhostUser::new(virtio::base_features(cfg.protected_vm), listener)
342 .context("failed to create VVU proxy device")?;
343
344 Ok(VirtioDeviceStub {
345 dev: Box::new(dev),
346 jail: simple_jail(cfg, "vvu_proxy_device")?,
347 })
348}
349
David Tolnay2b089fc2019-03-04 15:33:22 -0800350fn create_rng_device(cfg: &Config) -> DeviceResult {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700351 let dev = virtio::Rng::new(virtio::base_features(cfg.protected_vm))
352 .context("failed to set up rng")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800353
354 Ok(VirtioDeviceStub {
355 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700356 jail: simple_jail(cfg, "rng_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800357 })
358}
359
Woody Chow737ff122021-03-22 17:49:57 +0900360#[cfg(feature = "audio_cras")]
Woody Chow0b2b6062021-09-03 15:40:02 +0900361fn create_cras_snd_device(cfg: &Config, cras_snd: CrasSndParameters) -> DeviceResult {
362 let dev = virtio::snd::cras_backend::VirtioSndCras::new(
363 virtio::base_features(cfg.protected_vm),
364 cras_snd,
365 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700366 .context("failed to create cras sound device")?;
Woody Chow737ff122021-03-22 17:49:57 +0900367
368 let jail = match simple_jail(&cfg, "cras_snd_device")? {
369 Some(mut jail) => {
370 // Create a tmpfs in the device's root directory for cras_snd_device.
371 // The size is 20*1024, or 20 KB.
372 jail.mount_with_data(
373 Path::new("none"),
374 Path::new("/"),
375 "tmpfs",
376 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
377 "size=20480",
378 )?;
379
380 let run_cras_path = Path::new("/run/cras");
381 jail.mount_bind(run_cras_path, run_cras_path, true)?;
382
383 add_current_user_to_jail(&mut jail)?;
384
385 Some(jail)
386 }
387 None => None,
388 };
389
390 Ok(VirtioDeviceStub {
391 dev: Box::new(dev),
392 jail,
393 })
394}
395
David Tolnay2b089fc2019-03-04 15:33:22 -0800396#[cfg(feature = "tpm")]
397fn create_tpm_device(cfg: &Config) -> DeviceResult {
398 use std::ffi::CString;
399 use std::fs;
400 use std::process;
David Tolnay2b089fc2019-03-04 15:33:22 -0800401
402 let tpm_storage: PathBuf;
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700403 let mut tpm_jail = simple_jail(cfg, "tpm_device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800404
405 match &mut tpm_jail {
406 Some(jail) => {
407 // Create a tmpfs in the device's root directory for tpm
408 // simulator storage. The size is 20*1024, or 20 KB.
409 jail.mount_with_data(
410 Path::new("none"),
411 Path::new("/"),
412 "tmpfs",
413 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
414 "size=20480",
415 )?;
416
Fergus Dall51200512021-08-19 12:54:26 +1000417 let crosvm_ids = add_current_user_to_jail(jail)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800418
419 let pid = process::id();
420 let tpm_pid_dir = format!("/run/vm/tpm.{}", pid);
421 tpm_storage = Path::new(&tpm_pid_dir).to_owned();
Daniel Verkamp6b298582021-08-16 15:37:11 -0700422 fs::create_dir_all(&tpm_storage).with_context(|| {
423 format!("failed to create tpm storage dir {}", tpm_storage.display())
424 })?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800425 let tpm_pid_dir_c = CString::new(tpm_pid_dir).expect("no nul bytes");
David Tolnayfd0971d2019-03-04 17:15:57 -0800426 chown(&tpm_pid_dir_c, crosvm_ids.uid, crosvm_ids.gid)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700427 .context("failed to chown tpm storage")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800428
429 jail.mount_bind(&tpm_storage, &tpm_storage, true)?;
430 }
431 None => {
432 // Path used inside cros_sdk which does not have /run/vm.
433 tpm_storage = Path::new("/tmp/tpm-simulator").to_owned();
434 }
435 }
436
437 let dev = virtio::Tpm::new(tpm_storage);
438
439 Ok(VirtioDeviceStub {
440 dev: Box::new(dev),
441 jail: tpm_jail,
442 })
443}
444
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700445fn create_single_touch_device(
446 cfg: &Config,
447 single_touch_spec: &TouchDeviceOption,
448 idx: u32,
449) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800450 let socket = single_touch_spec
451 .get_path()
452 .into_unix_stream()
453 .map_err(|e| {
454 error!("failed configuring virtio single touch: {:?}", e);
455 e
456 })?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800457
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800458 let (width, height) = single_touch_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700459 let dev = virtio::new_single_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700460 idx,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700461 socket,
462 width,
463 height,
464 virtio::base_features(cfg.protected_vm),
465 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700466 .context("failed to set up input device")?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800467 Ok(VirtioDeviceStub {
468 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700469 jail: simple_jail(cfg, "input_device")?,
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800470 })
471}
472
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700473fn create_multi_touch_device(
474 cfg: &Config,
475 multi_touch_spec: &TouchDeviceOption,
476 idx: u32,
477) -> DeviceResult {
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000478 let socket = multi_touch_spec
479 .get_path()
480 .into_unix_stream()
481 .map_err(|e| {
482 error!("failed configuring virtio multi touch: {:?}", e);
483 e
484 })?;
485
486 let (width, height) = multi_touch_spec.get_size();
487 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700488 idx,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000489 socket,
490 width,
491 height,
492 virtio::base_features(cfg.protected_vm),
493 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700494 .context("failed to set up input device")?;
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000495
496 Ok(VirtioDeviceStub {
497 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700498 jail: simple_jail(cfg, "input_device")?,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000499 })
500}
501
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700502fn create_trackpad_device(
503 cfg: &Config,
504 trackpad_spec: &TouchDeviceOption,
505 idx: u32,
506) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800507 let socket = trackpad_spec.get_path().into_unix_stream().map_err(|e| {
Maciek Swiechc3011222021-11-24 21:01:04 +0000508 error!("failed configuring virtio trackpad: {:#}", e);
David Tolnay2b089fc2019-03-04 15:33:22 -0800509 e
510 })?;
511
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800512 let (width, height) = trackpad_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700513 let dev = virtio::new_trackpad(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700514 idx,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700515 socket,
516 width,
517 height,
518 virtio::base_features(cfg.protected_vm),
519 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700520 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800521
522 Ok(VirtioDeviceStub {
523 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700524 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800525 })
526}
527
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700528fn create_mouse_device<T: IntoUnixStream>(cfg: &Config, mouse_socket: T, idx: u32) -> DeviceResult {
Zach Reizner65b98f12019-11-22 17:34:58 -0800529 let socket = mouse_socket.into_unix_stream().map_err(|e| {
Maciek Swiechc3011222021-11-24 21:01:04 +0000530 error!("failed configuring virtio mouse: {:#}", e);
David Tolnay2b089fc2019-03-04 15:33:22 -0800531 e
532 })?;
533
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700534 let dev = virtio::new_mouse(idx, socket, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700535 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800536
537 Ok(VirtioDeviceStub {
538 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700539 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800540 })
541}
542
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700543fn create_keyboard_device<T: IntoUnixStream>(
544 cfg: &Config,
545 keyboard_socket: T,
546 idx: u32,
547) -> DeviceResult {
Zach Reizner65b98f12019-11-22 17:34:58 -0800548 let socket = keyboard_socket.into_unix_stream().map_err(|e| {
Maciek Swiechc3011222021-11-24 21:01:04 +0000549 error!("failed configuring virtio keyboard: {:#}", e);
David Tolnay2b089fc2019-03-04 15:33:22 -0800550 e
551 })?;
552
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700553 let dev = virtio::new_keyboard(idx, socket, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700554 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800555
556 Ok(VirtioDeviceStub {
557 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700558 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800559 })
560}
561
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700562fn create_switches_device<T: IntoUnixStream>(
563 cfg: &Config,
564 switches_socket: T,
565 idx: u32,
566) -> DeviceResult {
Daniel Norman5e23df72021-03-11 10:11:02 -0800567 let socket = switches_socket.into_unix_stream().map_err(|e| {
Maciek Swiechc3011222021-11-24 21:01:04 +0000568 error!("failed configuring virtio switches: {:#}", e);
Daniel Norman5e23df72021-03-11 10:11:02 -0800569 e
570 })?;
571
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700572 let dev = virtio::new_switches(idx, socket, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700573 .context("failed to set up input device")?;
Daniel Norman5e23df72021-03-11 10:11:02 -0800574
575 Ok(VirtioDeviceStub {
576 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700577 jail: simple_jail(cfg, "input_device")?,
Daniel Norman5e23df72021-03-11 10:11:02 -0800578 })
579}
580
David Tolnay2b089fc2019-03-04 15:33:22 -0800581fn create_vinput_device(cfg: &Config, dev_path: &Path) -> DeviceResult {
582 let dev_file = OpenOptions::new()
583 .read(true)
584 .write(true)
585 .open(dev_path)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700586 .with_context(|| format!("failed to open vinput device {}", dev_path.display()))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800587
Noah Goldd4ca29b2020-10-27 12:21:52 -0700588 let dev = virtio::new_evdev(dev_file, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700589 .context("failed to set up input device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800590
591 Ok(VirtioDeviceStub {
592 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700593 jail: simple_jail(cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800594 })
595}
596
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800597fn create_balloon_device(cfg: &Config, tube: Tube) -> DeviceResult {
598 let dev = virtio::Balloon::new(virtio::base_features(cfg.protected_vm), tube)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700599 .context("failed to create balloon")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800600
601 Ok(VirtioDeviceStub {
602 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700603 jail: simple_jail(cfg, "balloon_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800604 })
605}
606
Alexandre Courbot911773a2021-12-10 14:31:10 +0900607/// Generic method for creating a network device. `create_device` is a closure that takes the virtio
608/// features and number of queue pairs as parameters, and is responsible for creating the device
609/// itself.
610fn create_net_device<F, T>(cfg: &Config, policy: &str, create_device: F) -> DeviceResult
611where
612 F: Fn(u64, u16) -> Result<T>,
613 T: VirtioDevice + 'static,
614{
Xiong Zhang773c7072020-03-20 10:39:55 +0800615 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
616 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700617 if vcpu_count < vq_pairs as usize {
Alexandre Courbot911773a2021-12-10 14:31:10 +0900618 warn!("the number of net vq pairs must not exceed the vcpu count, falling back to single queue mode");
Xiong Zhang773c7072020-03-20 10:39:55 +0800619 vq_pairs = 1;
620 }
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100621 let features = virtio::base_features(cfg.protected_vm);
Alexandre Courbot911773a2021-12-10 14:31:10 +0900622
623 let dev = create_device(features, vq_pairs)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800624
625 Ok(VirtioDeviceStub {
Alexandre Courbot911773a2021-12-10 14:31:10 +0900626 dev: Box::new(dev) as Box<dyn VirtioDevice>,
627 jail: simple_jail(cfg, policy)?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800628 })
629}
630
Alexandre Courbot911773a2021-12-10 14:31:10 +0900631/// Returns a network device created from a new TAP interface configured with `host_ip`, `netmask`,
632/// and `mac_address`.
633fn create_net_device_from_config(
David Tolnay2b089fc2019-03-04 15:33:22 -0800634 cfg: &Config,
635 host_ip: Ipv4Addr,
636 netmask: Ipv4Addr,
637 mac_address: MacAddress,
David Tolnay2b089fc2019-03-04 15:33:22 -0800638) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800639 let policy = if cfg.vhost_net {
Matt Delco45caf912019-11-13 08:11:09 -0800640 "vhost_net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800641 } else {
Matt Delco45caf912019-11-13 08:11:09 -0800642 "net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800643 };
644
Alexandre Courbot911773a2021-12-10 14:31:10 +0900645 if cfg.vhost_net {
646 create_net_device(cfg, policy, |features, _vq_pairs| {
647 virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(
648 &cfg.vhost_net_device_path,
649 features,
650 host_ip,
651 netmask,
652 mac_address,
653 )
654 .context("failed to set up vhost networking")
655 })
656 } else {
657 create_net_device(cfg, policy, |features, vq_pairs| {
658 virtio::Net::<Tap>::new(features, host_ip, netmask, mac_address, vq_pairs)
659 .context("failed to create virtio network device")
660 })
661 }
662}
663
664/// Returns a network device from a file descriptor to a configured TAP interface.
665fn create_tap_net_device_from_fd(cfg: &Config, tap_fd: RawDescriptor) -> DeviceResult {
666 create_net_device(cfg, "net_device", |features, vq_pairs| {
667 // Safe because we ensure that we get a unique handle to the fd.
668 let tap = unsafe {
669 Tap::from_raw_descriptor(
670 validate_raw_descriptor(tap_fd).context("failed to validate tap descriptor")?,
671 )
672 .context("failed to create tap device")?
673 };
674
675 virtio::Net::from(features, tap, vq_pairs).context("failed to create tap net device")
David Tolnay2b089fc2019-03-04 15:33:22 -0800676 })
677}
678
Alexandre Courbot993aa7f2021-12-09 14:51:29 +0900679/// Returns a network device created by opening the persistent, configured TAP interface `tap_name`.
680fn create_tap_net_device_from_name(cfg: &Config, tap_name: &[u8]) -> DeviceResult {
681 create_net_device(cfg, "net_device", |features, vq_pairs| {
682 virtio::Net::<Tap>::new_from_name(features, tap_name, vq_pairs)
683 .context("failed to create configured virtio network device")
684 })
685}
686
Keiichi Watanabe60686582021-03-12 04:53:51 +0900687fn create_vhost_user_net_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
688 let dev = VhostUserNet::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700689 .context("failed to set up vhost-user net device")?;
Keiichi Watanabe60686582021-03-12 04:53:51 +0900690
691 Ok(VirtioDeviceStub {
692 dev: Box::new(dev),
693 // no sandbox here because virtqueue handling is exported to a different process.
694 jail: None,
695 })
696}
697
Chirantan Ekbote84091e52021-09-10 18:43:17 +0900698fn create_vhost_user_vsock_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
699 let dev = VhostUserVsock::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700700 .context("failed to set up vhost-user vsock device")?;
Chirantan Ekbote84091e52021-09-10 18:43:17 +0900701
702 Ok(VirtioDeviceStub {
703 dev: Box::new(dev),
704 // no sandbox here because virtqueue handling is exported to a different process.
705 jail: None,
706 })
707}
708
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900709fn create_vhost_user_wl_device(cfg: &Config, opt: &VhostUserWlOption) -> DeviceResult {
710 // The crosvm wl device expects us to connect the tube before it will accept a vhost-user
711 // connection.
712 let dev = VhostUserWl::new(virtio::base_features(cfg.protected_vm), &opt.socket)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700713 .context("failed to set up vhost-user wl device")?;
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900714
715 Ok(VirtioDeviceStub {
716 dev: Box::new(dev),
717 // no sandbox here because virtqueue handling is exported to a different process.
718 jail: None,
719 })
720}
721
David Tolnay2b089fc2019-03-04 15:33:22 -0800722#[cfg(feature = "gpu")]
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900723fn create_vhost_user_gpu_device(
724 cfg: &Config,
725 opt: &VhostUserOption,
726 host_tube: Tube,
727 device_tube: Tube,
728) -> DeviceResult {
729 // The crosvm gpu device expects us to connect the tube before it will accept a vhost-user
730 // connection.
731 let dev = VhostUserGpu::new(
732 virtio::base_features(cfg.protected_vm),
733 &opt.socket,
734 host_tube,
735 device_tube,
736 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700737 .context("failed to set up vhost-user gpu device")?;
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900738
739 Ok(VirtioDeviceStub {
740 dev: Box::new(dev),
741 // no sandbox here because virtqueue handling is exported to a different process.
742 jail: None,
743 })
744}
745
Alexandre Courbot22740d82021-12-15 17:06:27 +0900746/// Mirror-mount all the directories in `dirs` into `jail` on a best-effort basis.
747///
748/// This function will not return an error if any of the directories in `dirs` is missing.
749#[cfg(any(feature = "gpu", feature = "video-decoder", feature = "video-encoder"))]
750fn jail_mount_bind_if_exists<P: AsRef<std::ffi::OsStr>>(
751 jail: &mut Minijail,
752 dirs: &[P],
753) -> Result<()> {
754 for dir in dirs {
755 let dir_path = Path::new(dir);
756 if dir_path.exists() {
757 jail.mount_bind(dir_path, dir_path, false)?;
758 }
759 }
760
761 Ok(())
762}
763
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900764#[cfg(feature = "gpu")]
Chia-I Wufffb5692021-12-01 13:25:35 -0800765fn gpu_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
766 match simple_jail(cfg, policy)? {
767 Some(mut jail) => {
768 // Create a tmpfs in the device's root directory so that we can bind mount the
769 // dri directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
770 jail.mount_with_data(
771 Path::new("none"),
772 Path::new("/"),
773 "tmpfs",
774 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
775 "size=67108864",
776 )?;
777
778 // Device nodes required for DRM.
779 let sys_dev_char_path = Path::new("/sys/dev/char");
780 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
781 let sys_devices_path = Path::new("/sys/devices");
782 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
783
784 let drm_dri_path = Path::new("/dev/dri");
785 if drm_dri_path.exists() {
786 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
787 }
788
789 // If the ARM specific devices exist on the host, bind mount them in.
790 let mali0_path = Path::new("/dev/mali0");
791 if mali0_path.exists() {
792 jail.mount_bind(mali0_path, mali0_path, true)?;
793 }
794
795 let pvr_sync_path = Path::new("/dev/pvr_sync");
796 if pvr_sync_path.exists() {
797 jail.mount_bind(pvr_sync_path, pvr_sync_path, true)?;
798 }
799
800 // If the udmabuf driver exists on the host, bind mount it in.
801 let udmabuf_path = Path::new("/dev/udmabuf");
802 if udmabuf_path.exists() {
803 jail.mount_bind(udmabuf_path, udmabuf_path, true)?;
804 }
805
806 // Libraries that are required when mesa drivers are dynamically loaded.
Alexandre Courbot22740d82021-12-15 17:06:27 +0900807 jail_mount_bind_if_exists(
808 &mut jail,
809 &[
810 "/usr/lib",
811 "/usr/lib64",
812 "/lib",
813 "/lib64",
Lepton Wua0638452022-01-19 22:49:53 -0800814 "/usr/share/drirc.d",
Alexandre Courbot22740d82021-12-15 17:06:27 +0900815 "/usr/share/glvnd",
816 "/usr/share/vulkan",
817 ],
818 )?;
Chia-I Wufffb5692021-12-01 13:25:35 -0800819
820 // pvr driver requires read access to /proc/self/task/*/comm.
821 let proc_path = Path::new("/proc");
822 jail.mount(
823 proc_path,
824 proc_path,
825 "proc",
826 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY) as usize,
827 )?;
828
829 // To enable perfetto tracing, we need to give access to the perfetto service IPC
830 // endpoints.
831 let perfetto_path = Path::new("/run/perfetto");
832 if perfetto_path.exists() {
833 jail.mount_bind(perfetto_path, perfetto_path, true)?;
834 }
835
836 Ok(Some(jail))
837 }
838 None => Ok(None),
839 }
840}
841
842#[cfg(feature = "gpu")]
Chia-I Wu13ec6962022-01-12 10:42:14 -0800843struct GpuCacheInfo<'a> {
844 directory: Option<&'a str>,
845 environment: Vec<(&'a str, &'a str)>,
846}
847
848#[cfg(feature = "gpu")]
849fn get_gpu_cache_info<'a>(
850 cache_dir: Option<&'a String>,
851 cache_size: Option<&'a String>,
852 sandbox: bool,
853) -> GpuCacheInfo<'a> {
854 let mut dir = None;
855 let mut env = Vec::new();
856
857 if let Some(cache_dir) = cache_dir {
858 if !Path::new(cache_dir).exists() {
859 warn!("shader caching dir {} does not exist", cache_dir);
860 env.push(("MESA_GLSL_CACHE_DISABLE", "true"));
861 } else if cfg!(any(target_arch = "arm", target_arch = "aarch64")) && sandbox {
862 warn!("shader caching not yet supported on ARM with sandbox enabled");
863 env.push(("MESA_GLSL_CACHE_DISABLE", "true"));
864 } else {
865 dir = Some(cache_dir.as_str());
866
867 env.push(("MESA_GLSL_CACHE_DISABLE", "false"));
868 env.push(("MESA_GLSL_CACHE_DIR", cache_dir.as_str()));
869 if let Some(cache_size) = cache_size {
870 env.push(("MESA_GLSL_CACHE_MAX_SIZE", cache_size.as_str()));
871 }
872 }
873 }
874
875 GpuCacheInfo {
876 directory: dir,
877 environment: env,
878 }
879}
880
881#[cfg(feature = "gpu")]
David Tolnay2b089fc2019-03-04 15:33:22 -0800882fn create_gpu_device(
883 cfg: &Config,
Michael Hoyle685316f2020-09-16 15:29:20 -0700884 exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800885 gpu_device_tube: Tube,
886 resource_bridges: Vec<Tube>,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900887 wayland_socket_path: Option<&PathBuf>,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700888 x_display: Option<String>,
Chia-I Wu16fb6592021-11-10 11:45:32 -0800889 render_server_fd: Option<SafeDescriptor>,
Zach Reizner65b98f12019-11-22 17:34:58 -0800890 event_devices: Vec<EventDevice>,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700891 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800892) -> DeviceResult {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700893 let mut display_backends = vec![
894 virtio::DisplayBackend::X(x_display),
Jason Macnak60eb1fb2020-01-09 14:36:29 -0800895 virtio::DisplayBackend::Stub,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700896 ];
897
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700898 let wayland_socket_dirs = cfg
899 .wayland_socket_paths
900 .iter()
901 .map(|(_name, path)| path.parent())
902 .collect::<Option<Vec<_>>>()
Daniel Verkamp6b298582021-08-16 15:37:11 -0700903 .ok_or_else(|| anyhow!("wayland socket path has no parent or file name"))?;
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700904
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900905 if let Some(socket_path) = wayland_socket_path {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700906 display_backends.insert(
907 0,
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700908 virtio::DisplayBackend::Wayland(Some(socket_path.to_owned())),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700909 );
910 }
911
David Tolnay2b089fc2019-03-04 15:33:22 -0800912 let dev = virtio::Gpu::new(
Daniel Verkamp6b298582021-08-16 15:37:11 -0700913 exit_evt.try_clone().context("failed to clone event")?,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800914 Some(gpu_device_tube),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800915 resource_bridges,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700916 display_backends,
Jason Macnakcc7070b2019-11-06 14:48:12 -0800917 cfg.gpu_parameters.as_ref().unwrap(),
Chia-I Wu16fb6592021-11-10 11:45:32 -0800918 render_server_fd,
Zach Reizner65b98f12019-11-22 17:34:58 -0800919 event_devices,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700920 map_request,
921 cfg.sandbox,
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100922 virtio::base_features(cfg.protected_vm),
Gurchetan Singh781d9752021-02-15 17:45:22 -0800923 cfg.wayland_socket_paths.clone(),
David Tolnay2b089fc2019-03-04 15:33:22 -0800924 );
925
Chia-I Wufffb5692021-12-01 13:25:35 -0800926 let jail = match gpu_jail(cfg, "gpu_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800927 Some(mut jail) => {
John Batesb220eac2020-09-14 17:03:02 -0700928 // Prepare GPU shader disk cache directory.
Chia-I Wu13ec6962022-01-12 10:42:14 -0800929 let (cache_dir, cache_size) = cfg
John Batesb220eac2020-09-14 17:03:02 -0700930 .gpu_parameters
931 .as_ref()
Chia-I Wu13ec6962022-01-12 10:42:14 -0800932 .map(|params| (params.cache_path.as_ref(), params.cache_size.as_ref()))
933 .unwrap();
934 let cache_info = get_gpu_cache_info(cache_dir, cache_size, cfg.sandbox);
935
936 if let Some(dir) = cache_info.directory {
937 jail.mount_bind(dir, dir, true)?;
938 }
939 for (key, val) in cache_info.environment {
940 env::set_var(key, val);
John Batesb220eac2020-09-14 17:03:02 -0700941 }
942
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700943 // Bind mount the wayland socket's directory into jail's root. This is necessary since
944 // each new wayland context must open() the socket. If the wayland socket is ever
945 // destroyed and remade in the same host directory, new connections will be possible
946 // without restarting the wayland device.
947 for dir in &wayland_socket_dirs {
948 jail.mount_bind(dir, dir, true)?;
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700949 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800950
Fergus Dall51200512021-08-19 12:54:26 +1000951 add_current_user_to_jail(&mut jail)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800952
953 Some(jail)
954 }
955 None => None,
956 };
957
958 Ok(VirtioDeviceStub {
959 dev: Box::new(dev),
960 jail,
961 })
962}
963
Chia-I Wu16fb6592021-11-10 11:45:32 -0800964#[cfg(feature = "gpu")]
Chia-I Wu7f0f7c12022-01-12 10:42:18 -0800965fn get_gpu_render_server_environment(cache_info: &GpuCacheInfo) -> Result<Vec<String>> {
966 let mut env = Vec::new();
967
968 let mut cache_env_keys = HashSet::with_capacity(cache_info.environment.len());
969 for (key, val) in cache_info.environment.iter() {
970 env.push(format!("{}={}", key, val));
971 cache_env_keys.insert(*key);
972 }
973
974 for (key_os, val_os) in env::vars_os() {
975 // minijail should accept OsStr rather than str...
976 let into_string_err = |_| anyhow!("invalid environment key/val");
977 let key = key_os.into_string().map_err(into_string_err)?;
978 let val = val_os.into_string().map_err(into_string_err)?;
979
980 if !cache_env_keys.contains(key.as_str()) {
981 env.push(format!("{}={}", key, val));
982 }
983 }
984
985 Ok(env)
986}
987
988#[cfg(feature = "gpu")]
Chia-I Wu16fb6592021-11-10 11:45:32 -0800989fn start_gpu_render_server(
990 cfg: &Config,
991 render_server_parameters: &GpuRenderServerParameters,
992) -> Result<SafeDescriptor> {
993 let (server_socket, client_socket) =
994 UnixSeqpacket::pair().context("failed to create render server socket")?;
995
Chia-I Wu7f0f7c12022-01-12 10:42:18 -0800996 let mut env = None;
Chia-I Wu16fb6592021-11-10 11:45:32 -0800997 let jail = match gpu_jail(cfg, "gpu_render_server")? {
998 Some(mut jail) => {
Chia-I Wu7f0f7c12022-01-12 10:42:18 -0800999 let cache_info = get_gpu_cache_info(
1000 render_server_parameters.cache_path.as_ref(),
1001 render_server_parameters.cache_size.as_ref(),
1002 cfg.sandbox,
1003 );
1004
1005 if let Some(dir) = cache_info.directory {
1006 jail.mount_bind(dir, dir, true)?;
1007 }
1008
1009 if !cache_info.environment.is_empty() {
1010 env = Some(get_gpu_render_server_environment(&cache_info)?);
1011 }
Chia-I Wu16fb6592021-11-10 11:45:32 -08001012
Chia-I Wub86f7f62021-12-13 12:10:22 -08001013 // bind mount /dev/log for syslog
1014 let log_path = Path::new("/dev/log");
1015 if log_path.exists() {
1016 jail.mount_bind(log_path, log_path, true)?;
1017 }
1018
Chia-I Wu16fb6592021-11-10 11:45:32 -08001019 // Run as root in the jail to keep capabilities after execve, which is needed for
1020 // mounting to work. All capabilities will be dropped afterwards.
1021 add_current_user_as_root_to_jail(&mut jail)?;
1022
1023 jail
1024 }
1025 None => Minijail::new().context("failed to create jail")?,
1026 };
1027
1028 let inheritable_fds = [
1029 server_socket.as_raw_descriptor(),
1030 libc::STDOUT_FILENO,
1031 libc::STDERR_FILENO,
1032 ];
1033
1034 let cmd = &render_server_parameters.path;
1035 let cmd_str = cmd
1036 .to_str()
1037 .ok_or_else(|| anyhow!("invalid render server path"))?;
1038 let fd_str = server_socket.as_raw_descriptor().to_string();
1039 let args = [cmd_str, "--socket-fd", &fd_str];
1040
Chia-I Wu7f0f7c12022-01-12 10:42:18 -08001041 let mut envp: Option<Vec<&str>> = None;
1042 if let Some(ref env) = env {
1043 envp = Some(env.iter().map(AsRef::as_ref).collect());
1044 }
1045
1046 jail.run_command(minijail::Command::new_for_path(
1047 cmd,
1048 &inheritable_fds,
1049 &args,
1050 envp.as_deref(),
1051 )?)
1052 .context("failed to start gpu render server")?;
Chia-I Wu16fb6592021-11-10 11:45:32 -08001053
1054 Ok(SafeDescriptor::from(client_socket))
1055}
1056
David Tolnay2b089fc2019-03-04 15:33:22 -08001057fn create_wayland_device(
1058 cfg: &Config,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001059 control_tube: Tube,
1060 resource_bridge: Option<Tube>,
David Tolnay2b089fc2019-03-04 15:33:22 -08001061) -> DeviceResult {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001062 let wayland_socket_dirs = cfg
1063 .wayland_socket_paths
1064 .iter()
1065 .map(|(_name, path)| path.parent())
1066 .collect::<Option<Vec<_>>>()
Daniel Verkamp6b298582021-08-16 15:37:11 -07001067 .ok_or_else(|| anyhow!("wayland socket path has no parent or file name"))?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001068
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001069 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +01001070 let dev = virtio::Wl::new(
1071 features,
1072 cfg.wayland_socket_paths.clone(),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001073 control_tube,
Will Deacon81d5adb2020-10-06 18:37:48 +01001074 resource_bridge,
1075 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001076 .context("failed to create wayland device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001077
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001078 let jail = match simple_jail(cfg, "wl_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -08001079 Some(mut jail) => {
1080 // Create a tmpfs in the device's root directory so that we can bind mount the wayland
1081 // socket directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
1082 jail.mount_with_data(
1083 Path::new("none"),
1084 Path::new("/"),
1085 "tmpfs",
1086 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
1087 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -08001088 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001089
1090 // Bind mount the wayland socket's directory into jail's root. This is necessary since
1091 // each new wayland context must open() the socket. If the wayland socket is ever
1092 // destroyed and remade in the same host directory, new connections will be possible
1093 // without restarting the wayland device.
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001094 for dir in &wayland_socket_dirs {
1095 jail.mount_bind(dir, dir, true)?;
1096 }
Fergus Dall51200512021-08-19 12:54:26 +10001097 add_current_user_to_jail(&mut jail)?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001098
1099 Some(jail)
1100 }
1101 None => None,
1102 };
1103
1104 Ok(VirtioDeviceStub {
1105 dev: Box::new(dev),
1106 jail,
1107 })
1108}
1109
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001110#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
1111fn create_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001112 backend: VideoBackendType,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001113 cfg: &Config,
1114 typ: devices::virtio::VideoDeviceType,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001115 resource_bridge: Tube,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001116) -> DeviceResult {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001117 let jail = match simple_jail(cfg, "video_device")? {
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001118 Some(mut jail) => {
1119 match typ {
Alexandre Courbot8230abf2021-06-26 22:49:26 +09001120 #[cfg(feature = "video-decoder")]
Fergus Dall51200512021-08-19 12:54:26 +10001121 devices::virtio::VideoDeviceType::Decoder => add_current_user_to_jail(&mut jail)?,
Alexandre Courbot8230abf2021-06-26 22:49:26 +09001122 #[cfg(feature = "video-encoder")]
Fergus Dall51200512021-08-19 12:54:26 +10001123 devices::virtio::VideoDeviceType::Encoder => add_current_user_to_jail(&mut jail)?,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001124 };
1125
1126 // Create a tmpfs in the device's root directory so that we can bind mount files.
1127 jail.mount_with_data(
1128 Path::new("none"),
1129 Path::new("/"),
1130 "tmpfs",
1131 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
1132 "size=67108864",
1133 )?;
1134
Alexandre Courbotc02960d2021-07-11 23:06:30 +09001135 #[cfg(feature = "libvda")]
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001136 // Render node for libvda.
Alexandre Courbot54cf8342021-12-20 18:10:08 +09001137 if backend == VideoBackendType::Libvda || backend == VideoBackendType::LibvdaVd {
Chih-Yu Huangd2c2bd12021-12-06 14:09:59 +09001138 // follow the implementation at:
1139 // https://source.corp.google.com/chromeos_public/src/platform/minigbm/cros_gralloc/cros_gralloc_driver.cc;l=90;bpv=0;cl=c06cc9cccb3cf3c7f9d2aec706c27c34cd6162a0
1140 const DRM_NUM_NODES: u32 = 63;
1141 const DRM_RENDER_NODE_START: u32 = 128;
1142 for offset in 0..DRM_NUM_NODES {
1143 let path_str = format!("/dev/dri/renderD{}", DRM_RENDER_NODE_START + offset);
1144 let dev_dri_path = Path::new(&path_str);
1145 if !dev_dri_path.exists() {
1146 break;
1147 }
1148 jail.mount_bind(dev_dri_path, dev_dri_path, false)?;
1149 }
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001150 }
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001151
David Stevense341d0a2020-10-08 18:02:32 +09001152 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1153 {
1154 // Device nodes used by libdrm through minigbm in libvda on AMD devices.
1155 let sys_dev_char_path = Path::new("/sys/dev/char");
1156 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
1157 let sys_devices_path = Path::new("/sys/devices");
1158 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
1159
1160 // Required for loading dri libraries loaded by minigbm on AMD devices.
Alexandre Courbot22740d82021-12-15 17:06:27 +09001161 jail_mount_bind_if_exists(&mut jail, &["/usr/lib64"])?;
David Stevense341d0a2020-10-08 18:02:32 +09001162 }
1163
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001164 // Device nodes required by libchrome which establishes Mojo connection in libvda.
1165 let dev_urandom_path = Path::new("/dev/urandom");
1166 jail.mount_bind(dev_urandom_path, dev_urandom_path, false)?;
1167 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
1168 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
1169
1170 Some(jail)
1171 }
1172 None => None,
1173 };
1174
1175 Ok(VirtioDeviceStub {
1176 dev: Box::new(devices::virtio::VideoDevice::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001177 virtio::base_features(cfg.protected_vm),
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001178 typ,
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001179 backend,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001180 Some(resource_bridge),
1181 )),
1182 jail,
1183 })
1184}
1185
1186#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
1187fn register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001188 backend: VideoBackendType,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001189 devs: &mut Vec<VirtioDeviceStub>,
Daniel Verkampffb59122021-03-18 14:06:15 -07001190 video_tube: Tube,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001191 cfg: &Config,
1192 typ: devices::virtio::VideoDeviceType,
Daniel Verkamp6b298582021-08-16 15:37:11 -07001193) -> Result<()> {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001194 devs.push(create_video_device(backend, cfg, typ, video_tube)?);
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001195 Ok(())
1196}
1197
Chirantan Ekbote3e8d52b2021-09-10 18:27:16 +09001198fn create_vhost_vsock_device(cfg: &Config, cid: u64) -> DeviceResult {
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001199 let features = virtio::base_features(cfg.protected_vm);
Christian Blichmann50f95912021-11-05 16:59:39 +01001200
1201 let device_file = match cfg
1202 .vhost_vsock_device
1203 .as_ref()
1204 .unwrap_or(&VhostVsockDeviceParameter::default())
1205 {
1206 VhostVsockDeviceParameter::Fd(fd) => {
1207 let fd = validate_raw_descriptor(*fd)
1208 .context("failed to validate fd for virtual socker device")?;
1209 // Safe because the `fd` is actually owned by this process and
1210 // we have a unique handle to it.
1211 unsafe { File::from_raw_fd(fd) }
1212 }
1213 VhostVsockDeviceParameter::Path(path) => OpenOptions::new()
1214 .read(true)
1215 .write(true)
1216 .custom_flags(libc::O_CLOEXEC | libc::O_NONBLOCK)
1217 .open(path)
1218 .context("failed to open virtual socket device")?,
1219 };
1220
1221 let dev = virtio::vhost::Vsock::new(device_file, features, cid)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001222 .context("failed to set up virtual socket device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001223
1224 Ok(VirtioDeviceStub {
1225 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001226 jail: simple_jail(cfg, "vhost_vsock_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -08001227 })
1228}
1229
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001230fn create_fs_device(
1231 cfg: &Config,
1232 uid_map: &str,
1233 gid_map: &str,
1234 src: &Path,
1235 tag: &str,
1236 fs_cfg: virtio::fs::passthrough::Config,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001237 device_tube: Tube,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001238) -> DeviceResult {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001239 let max_open_files =
1240 base::get_max_open_files().context("failed to get max number of open files")?;
Matt Delcoc24ad782020-02-14 13:24:36 -08001241 let j = if cfg.sandbox {
1242 let seccomp_policy = cfg.seccomp_policy_dir.join("fs_device");
1243 let config = SandboxConfig {
1244 limit_caps: false,
1245 uid_map: Some(uid_map),
1246 gid_map: Some(gid_map),
1247 log_failures: cfg.seccomp_log_failures,
1248 seccomp_policy: &seccomp_policy,
1249 };
Chirantan Ekbote34d45e52020-04-20 18:15:02 +09001250 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
1251 // We want bind mounts from the parent namespaces to propagate into the fs device's
1252 // namespace.
1253 jail.set_remount_mode(libc::MS_SLAVE);
1254
1255 jail
Matt Delcoc24ad782020-02-14 13:24:36 -08001256 } else {
1257 create_base_minijail(src, Some(max_open_files), None)?
1258 };
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001259
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001260 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001261 // TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic
1262 // when num_queues > 1.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001263 let dev = virtio::fs::Fs::new(features, tag, 1, fs_cfg, device_tube)
1264 .context("failed to create fs device")?;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001265
1266 Ok(VirtioDeviceStub {
1267 dev: Box::new(dev),
1268 jail: Some(j),
1269 })
1270}
1271
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001272fn create_9p_device(
1273 cfg: &Config,
1274 uid_map: &str,
1275 gid_map: &str,
1276 src: &Path,
1277 tag: &str,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001278 mut p9_cfg: p9::Config,
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001279) -> DeviceResult {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001280 let max_open_files =
1281 base::get_max_open_files().context("failed to get max number of open files")?;
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001282 let (jail, root) = if cfg.sandbox {
1283 let seccomp_policy = cfg.seccomp_policy_dir.join("9p_device");
1284 let config = SandboxConfig {
1285 limit_caps: false,
1286 uid_map: Some(uid_map),
1287 gid_map: Some(gid_map),
1288 log_failures: cfg.seccomp_log_failures,
1289 seccomp_policy: &seccomp_policy,
1290 };
David Tolnay2b089fc2019-03-04 15:33:22 -08001291
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001292 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
1293 // We want bind mounts from the parent namespaces to propagate into the 9p server's
1294 // namespace.
1295 jail.set_remount_mode(libc::MS_SLAVE);
Chirantan Ekbote055de382020-01-24 12:16:58 +09001296
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001297 // The shared directory becomes the root of the device's file system.
1298 let root = Path::new("/");
1299 (Some(jail), root)
1300 } else {
1301 // There's no mount namespace so we tell the server to treat the source directory as the
1302 // root.
1303 (None, src)
David Tolnay2b089fc2019-03-04 15:33:22 -08001304 };
1305
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001306 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001307 p9_cfg.root = root.into();
Daniel Verkamp6b298582021-08-16 15:37:11 -07001308 let dev = virtio::P9::new(features, tag, p9_cfg).context("failed to create 9p device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001309
1310 Ok(VirtioDeviceStub {
1311 dev: Box::new(dev),
1312 jail,
1313 })
1314}
1315
Jakub Starona3411ea2019-04-24 10:55:25 -07001316fn create_pmem_device(
1317 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001318 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001319 resources: &mut SystemAllocator,
1320 disk: &DiskOption,
1321 index: usize,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001322 pmem_device_tube: Tube,
Jakub Starona3411ea2019-04-24 10:55:25 -07001323) -> DeviceResult {
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09001324 let fd = open_file(&disk.path, disk.read_only, false /*O_DIRECT*/)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001325 .with_context(|| format!("failed to load disk image {}", disk.path.display()))?;
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001326
1327 let (disk_size, arena_size) = {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001328 let metadata = std::fs::metadata(&disk.path).with_context(|| {
1329 format!("failed to get disk image {} metadata", disk.path.display())
1330 })?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001331 let disk_len = metadata.len();
1332 // Linux requires pmem region sizes to be 2 MiB aligned. Linux will fill any partial page
1333 // at the end of an mmap'd file and won't write back beyond the actual file length, but if
1334 // we just align the size of the file to 2 MiB then access beyond the last page of the
1335 // mapped file will generate SIGBUS. So use a memory mapping arena that will provide
1336 // padding up to 2 MiB.
1337 let alignment = 2 * 1024 * 1024;
1338 let align_adjust = if disk_len % alignment != 0 {
1339 alignment - (disk_len % alignment)
1340 } else {
1341 0
1342 };
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001343 (
1344 disk_len,
1345 disk_len
1346 .checked_add(align_adjust)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001347 .ok_or_else(|| anyhow!("pmem device image too big"))?,
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001348 )
Jakub Starona3411ea2019-04-24 10:55:25 -07001349 };
1350
1351 let protection = {
1352 if disk.read_only {
1353 Protection::read()
1354 } else {
1355 Protection::read_write()
1356 }
1357 };
1358
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001359 let arena = {
Jakub Starona3411ea2019-04-24 10:55:25 -07001360 // Conversion from u64 to usize may fail on 32bit system.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001361 let arena_size = usize::try_from(arena_size).context("pmem device image too big")?;
1362 let disk_size = usize::try_from(disk_size).context("pmem device image too big")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001363
Daniel Verkamp6b298582021-08-16 15:37:11 -07001364 let mut arena =
1365 MemoryMappingArena::new(arena_size).context("failed to reserve pmem memory")?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001366 arena
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001367 .add_fd_offset_protection(0, disk_size, &fd, 0, protection)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001368 .context("failed to reserve pmem memory")?;
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001369
1370 // If the disk is not a multiple of the page size, the OS will fill the remaining part
1371 // of the page with zeroes. However, the anonymous mapping added below must start on a
1372 // page boundary, so round up the size before calculating the offset of the anon region.
1373 let disk_size = round_up_to_page_size(disk_size);
1374
1375 if arena_size > disk_size {
1376 // Add an anonymous region with the same protection as the disk mapping if the arena
1377 // size was aligned.
1378 arena
1379 .add_anon_protection(disk_size, arena_size - disk_size, protection)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001380 .context("failed to reserve pmem padding")?;
Daniel Verkamp3eeaf6a2021-08-30 15:29:44 -07001381 }
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001382 arena
Jakub Starona3411ea2019-04-24 10:55:25 -07001383 };
1384
1385 let mapping_address = resources
Xiong Zhang383b3b52019-10-30 14:59:26 +08001386 .mmio_allocator(MmioType::High)
Daniel Verkamp57e4f542021-10-28 09:56:40 -07001387 .reverse_allocate_with_align(
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001388 arena_size,
Jakub Starona3411ea2019-04-24 10:55:25 -07001389 Alloc::PmemDevice(index),
1390 format!("pmem_disk_image_{}", index),
1391 // Linux kernel requires pmem namespaces to be 128 MiB aligned.
1392 128 * 1024 * 1024, /* 128 MiB */
1393 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001394 .context("failed to allocate memory for pmem device")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001395
Daniel Verkampe1980a92020-02-07 11:00:55 -08001396 let slot = vm
Gurchetan Singh173fe622020-05-21 18:05:06 -07001397 .add_memory_region(
Daniel Verkampe1980a92020-02-07 11:00:55 -08001398 GuestAddress(mapping_address),
Gurchetan Singh173fe622020-05-21 18:05:06 -07001399 Box::new(arena),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001400 /* read_only = */ disk.read_only,
1401 /* log_dirty_pages = */ false,
1402 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001403 .context("failed to add pmem device memory")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001404
Daniel Verkampe1980a92020-02-07 11:00:55 -08001405 let dev = virtio::Pmem::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001406 virtio::base_features(cfg.protected_vm),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001407 fd,
1408 GuestAddress(mapping_address),
1409 slot,
1410 arena_size,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001411 Some(pmem_device_tube),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001412 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001413 .context("failed to create pmem device")?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001414
1415 Ok(VirtioDeviceStub {
1416 dev: Box::new(dev) as Box<dyn VirtioDevice>,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001417 jail: simple_jail(cfg, "pmem_device")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07001418 })
1419}
1420
Zide Chendfc4b882021-03-10 16:35:37 -08001421fn create_iommu_device(
1422 cfg: &Config,
Zide Chen71435c12021-03-03 15:02:02 -08001423 phys_max_addr: u64,
Zide Chendfc4b882021-03-10 16:35:37 -08001424 endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
1425) -> DeviceResult {
Zide Chen71435c12021-03-03 15:02:02 -08001426 let dev = virtio::Iommu::new(
1427 virtio::base_features(cfg.protected_vm),
1428 endpoints,
1429 phys_max_addr,
1430 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001431 .context("failed to create IOMMU device")?;
Zide Chendfc4b882021-03-10 16:35:37 -08001432
1433 Ok(VirtioDeviceStub {
1434 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001435 jail: simple_jail(cfg, "iommu_device")?,
Zide Chendfc4b882021-03-10 16:35:37 -08001436 })
1437}
1438
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001439fn create_console_device(cfg: &Config, param: &SerialParameters) -> DeviceResult {
Michael Hoylecd23bc22020-10-20 22:12:20 -07001440 let mut keep_rds = Vec::new();
Daniel Verkamp6b298582021-08-16 15:37:11 -07001441 let evt = Event::new().context("failed to create event")?;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001442 let dev = param
Michael Hoylecd23bc22020-10-20 22:12:20 -07001443 .create_serial_device::<Console>(cfg.protected_vm, &evt, &mut keep_rds)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001444 .context("failed to create console device")?;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001445
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001446 let jail = match simple_jail(cfg, "serial")? {
Nicholas Verne71e73d82020-07-08 17:19:55 +10001447 Some(mut jail) => {
1448 // Create a tmpfs in the device's root directory so that we can bind mount the
1449 // log socket directory into it.
1450 // The size=67108864 is size=64*1024*1024 or size=64MB.
1451 jail.mount_with_data(
1452 Path::new("none"),
1453 Path::new("/"),
1454 "tmpfs",
1455 (libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_NOSUID) as usize,
1456 "size=67108864",
1457 )?;
Fergus Dall51200512021-08-19 12:54:26 +10001458 add_current_user_to_jail(&mut jail)?;
Nicholas Verne71e73d82020-07-08 17:19:55 +10001459 let res = param.add_bind_mounts(&mut jail);
1460 if res.is_err() {
1461 error!("failed to add bind mounts for console device");
1462 }
1463 Some(jail)
1464 }
1465 None => None,
1466 };
1467
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001468 Ok(VirtioDeviceStub {
1469 dev: Box::new(dev),
Nicholas Verne71e73d82020-07-08 17:19:55 +10001470 jail, // TODO(dverkamp): use a separate policy for console?
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001471 })
1472}
1473
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001474#[cfg(feature = "audio")]
1475fn create_sound_device(path: &Path, cfg: &Config) -> DeviceResult {
1476 let dev = virtio::new_sound(path, virtio::base_features(cfg.protected_vm))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001477 .context("failed to create sound device")?;
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001478
1479 Ok(VirtioDeviceStub {
1480 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001481 jail: simple_jail(cfg, "vios_audio_device")?,
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001482 })
1483}
1484
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001485// gpu_device_tube is not used when GPU support is disabled.
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -07001486#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001487fn create_virtio_devices(
1488 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001489 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001490 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001491 _exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001492 wayland_device_tube: Tube,
1493 gpu_device_tube: Tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001494 vhost_user_gpu_tubes: Vec<(Tube, Tube)>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001495 balloon_device_tube: Tube,
1496 disk_device_tubes: &mut Vec<Tube>,
1497 pmem_device_tubes: &mut Vec<Tube>,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001498 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001499 fs_device_tubes: &mut Vec<Tube>,
David Tolnay2b089fc2019-03-04 15:33:22 -08001500) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -07001501 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -08001502
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001503 for (_, param) in cfg
1504 .serial_parameters
1505 .iter()
1506 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
1507 {
1508 let dev = create_console_device(cfg, param)?;
1509 devs.push(dev);
1510 }
1511
Zach Reizner8fb52112017-12-13 16:04:39 -08001512 for disk in &cfg.disks {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001513 let disk_device_tube = disk_device_tubes.remove(0);
1514 devs.push(create_block_device(cfg, disk, disk_device_tube)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001515 }
1516
Keiichi Watanabef3a37f42021-01-21 15:41:11 +09001517 for blk in &cfg.vhost_user_blk {
1518 devs.push(create_vhost_user_block_device(cfg, blk)?);
1519 }
1520
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +09001521 for console in &cfg.vhost_user_console {
1522 devs.push(create_vhost_user_console_device(cfg, console)?);
1523 }
1524
Jakub Starona3411ea2019-04-24 10:55:25 -07001525 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001526 let pmem_device_tube = pmem_device_tubes.remove(0);
Daniel Verkampe1980a92020-02-07 11:00:55 -08001527 devs.push(create_pmem_device(
1528 cfg,
1529 vm,
1530 resources,
1531 pmem_disk,
1532 index,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001533 pmem_device_tube,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001534 )?);
Jakub Starona3411ea2019-04-24 10:55:25 -07001535 }
1536
David Tolnay2b089fc2019-03-04 15:33:22 -08001537 devs.push(create_rng_device(cfg)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001538
David Tolnayde6b29a2018-12-20 11:49:46 -08001539 #[cfg(feature = "tpm")]
1540 {
David Tolnay43f8e212019-02-13 17:28:16 -08001541 if cfg.software_tpm {
David Tolnay2b089fc2019-03-04 15:33:22 -08001542 devs.push(create_tpm_device(cfg)?);
David Tolnay43f8e212019-02-13 17:28:16 -08001543 }
David Tolnayde6b29a2018-12-20 11:49:46 -08001544 }
1545
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001546 for (idx, single_touch_spec) in cfg.virtio_single_touch.iter().enumerate() {
1547 devs.push(create_single_touch_device(
1548 cfg,
1549 single_touch_spec,
1550 idx as u32,
1551 )?);
Jorge E. Moreira99d3f082019-03-07 10:59:54 -08001552 }
1553
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001554 for (idx, multi_touch_spec) in cfg.virtio_multi_touch.iter().enumerate() {
1555 devs.push(create_multi_touch_device(
1556 cfg,
1557 multi_touch_spec,
1558 idx as u32,
1559 )?);
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001560 }
1561
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001562 for (idx, trackpad_spec) in cfg.virtio_trackpad.iter().enumerate() {
1563 devs.push(create_trackpad_device(cfg, trackpad_spec, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001564 }
1565
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001566 for (idx, mouse_socket) in cfg.virtio_mice.iter().enumerate() {
1567 devs.push(create_mouse_device(cfg, mouse_socket, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001568 }
1569
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001570 for (idx, keyboard_socket) in cfg.virtio_keyboard.iter().enumerate() {
1571 devs.push(create_keyboard_device(cfg, keyboard_socket, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001572 }
1573
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001574 for (idx, switches_socket) in cfg.virtio_switches.iter().enumerate() {
1575 devs.push(create_switches_device(cfg, switches_socket, idx as u32)?);
Daniel Norman5e23df72021-03-11 10:11:02 -08001576 }
1577
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001578 for dev_path in &cfg.virtio_input_evdevs {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001579 devs.push(create_vinput_device(cfg, dev_path)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001580 }
1581
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001582 devs.push(create_balloon_device(cfg, balloon_device_tube)?);
Dylan Reid295ccac2017-11-06 14:06:24 -08001583
Zach Reizner39aa26b2017-12-12 18:03:23 -08001584 // We checked above that if the IP is defined, then the netmask is, too.
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001585 for tap_fd in &cfg.tap_fd {
Alexandre Courbot911773a2021-12-10 14:31:10 +09001586 devs.push(create_tap_net_device_from_fd(cfg, *tap_fd)?);
Jorge E. Moreirab7952802019-02-12 16:43:05 -08001587 }
1588
David Tolnay2b089fc2019-03-04 15:33:22 -08001589 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
1590 (cfg.host_ip, cfg.netmask, cfg.mac_address)
1591 {
Keiichi Watanabe60686582021-03-12 04:53:51 +09001592 if !cfg.vhost_user_net.is_empty() {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001593 bail!("vhost-user-net cannot be used with any of --host_ip, --netmask or --mac");
Keiichi Watanabe60686582021-03-12 04:53:51 +09001594 }
Alexandre Courbot911773a2021-12-10 14:31:10 +09001595 devs.push(create_net_device_from_config(
1596 cfg,
1597 host_ip,
1598 netmask,
1599 mac_address,
1600 )?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001601 }
1602
Alexandre Courbot993aa7f2021-12-09 14:51:29 +09001603 for tap_name in &cfg.tap_name {
1604 devs.push(create_tap_net_device_from_name(cfg, tap_name.as_bytes())?);
1605 }
1606
Keiichi Watanabe60686582021-03-12 04:53:51 +09001607 for net in &cfg.vhost_user_net {
1608 devs.push(create_vhost_user_net_device(cfg, net)?);
1609 }
1610
Chirantan Ekbote84091e52021-09-10 18:43:17 +09001611 for vsock in &cfg.vhost_user_vsock {
1612 devs.push(create_vhost_user_vsock_device(cfg, vsock)?);
1613 }
1614
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09001615 for opt in &cfg.vhost_user_wl {
1616 devs.push(create_vhost_user_wl_device(cfg, opt)?);
1617 }
1618
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001619 #[cfg(feature = "gpu")]
1620 for (opt, (host_tube, device_tube)) in cfg.vhost_user_gpu.iter().zip(vhost_user_gpu_tubes) {
1621 devs.push(create_vhost_user_gpu_device(
1622 cfg,
1623 opt,
1624 host_tube,
1625 device_tube,
1626 )?);
1627 }
1628
Abhishek Bhardwaj103c1b72021-11-01 15:52:23 -07001629 for opt in &cfg.vvu_proxy {
1630 devs.push(create_vvu_proxy_device(cfg, opt)?);
1631 }
1632
David Tolnayfa701712019-02-13 16:42:54 -08001633 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001634 let mut resource_bridges = Vec::<Tube>::new();
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001635
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001636 if !cfg.wayland_socket_paths.is_empty() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001637 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001638 let mut wl_resource_bridge = None::<Tube>;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001639
1640 #[cfg(feature = "gpu")]
1641 {
Jason Macnakcc7070b2019-11-06 14:48:12 -08001642 if cfg.gpu_parameters.is_some() {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001643 let (wl_socket, gpu_socket) = Tube::pair().context("failed to create tube")?;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001644 resource_bridges.push(gpu_socket);
1645 wl_resource_bridge = Some(wl_socket);
1646 }
1647 }
1648
1649 devs.push(create_wayland_device(
1650 cfg,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001651 wayland_device_tube,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001652 wl_resource_bridge,
1653 )?);
1654 }
David Tolnayfa701712019-02-13 16:42:54 -08001655
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001656 #[cfg(feature = "video-decoder")]
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001657 let video_dec_cfg = if let Some(backend) = cfg.video_dec {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001658 let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
Daniel Verkampffb59122021-03-18 14:06:15 -07001659 resource_bridges.push(gpu_tube);
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001660 Some((video_tube, backend))
Daniel Verkampffb59122021-03-18 14:06:15 -07001661 } else {
1662 None
1663 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001664
1665 #[cfg(feature = "video-encoder")]
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001666 let video_enc_cfg = if let Some(backend) = cfg.video_enc {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001667 let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
Daniel Verkampffb59122021-03-18 14:06:15 -07001668 resource_bridges.push(gpu_tube);
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001669 Some((video_tube, backend))
Daniel Verkampffb59122021-03-18 14:06:15 -07001670 } else {
1671 None
1672 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001673
Zach Reizner3a8100a2017-09-13 19:15:43 -07001674 #[cfg(feature = "gpu")]
1675 {
Noah Golddc7f52b2020-02-01 13:01:58 -08001676 if let Some(gpu_parameters) = &cfg.gpu_parameters {
Jason Macnakd659a0d2021-03-15 15:33:01 -07001677 let mut gpu_display_w = DEFAULT_DISPLAY_WIDTH;
1678 let mut gpu_display_h = DEFAULT_DISPLAY_HEIGHT;
1679 if !gpu_parameters.displays.is_empty() {
1680 gpu_display_w = gpu_parameters.displays[0].width;
1681 gpu_display_h = gpu_parameters.displays[0].height;
1682 }
1683
Zach Reizner65b98f12019-11-22 17:34:58 -08001684 let mut event_devices = Vec::new();
1685 if cfg.display_window_mouse {
1686 let (event_device_socket, virtio_dev_socket) =
Daniel Verkamp6b298582021-08-16 15:37:11 -07001687 UnixStream::pair().context("failed to create socket")?;
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001688 let (multi_touch_width, multi_touch_height) = cfg
1689 .virtio_multi_touch
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001690 .first()
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001691 .as_ref()
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001692 .map(|multi_touch_spec| multi_touch_spec.get_size())
Jason Macnakd659a0d2021-03-15 15:33:01 -07001693 .unwrap_or((gpu_display_w, gpu_display_h));
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001694 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001695 // u32::MAX is the least likely to collide with the indices generated above for
1696 // the multi_touch options, which begin at 0.
1697 u32::MAX,
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001698 virtio_dev_socket,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001699 multi_touch_width,
1700 multi_touch_height,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001701 virtio::base_features(cfg.protected_vm),
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001702 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001703 .context("failed to set up mouse device")?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001704 devs.push(VirtioDeviceStub {
1705 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001706 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001707 });
1708 event_devices.push(EventDevice::touchscreen(event_device_socket));
1709 }
1710 if cfg.display_window_keyboard {
1711 let (event_device_socket, virtio_dev_socket) =
Daniel Verkamp6b298582021-08-16 15:37:11 -07001712 UnixStream::pair().context("failed to create socket")?;
Noah Goldd4ca29b2020-10-27 12:21:52 -07001713 let dev = virtio::new_keyboard(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001714 // u32::MAX is the least likely to collide with the indices generated above for
1715 // the multi_touch options, which begin at 0.
1716 u32::MAX,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001717 virtio_dev_socket,
1718 virtio::base_features(cfg.protected_vm),
1719 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001720 .context("failed to set up keyboard device")?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001721 devs.push(VirtioDeviceStub {
1722 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001723 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001724 });
1725 event_devices.push(EventDevice::keyboard(event_device_socket));
1726 }
Chia-I Wu16fb6592021-11-10 11:45:32 -08001727
1728 let mut render_server_fd = None;
1729 if let Some(ref render_server_parameters) = gpu_parameters.render_server {
1730 render_server_fd = Some(start_gpu_render_server(cfg, render_server_parameters)?);
1731 }
1732
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001733 devs.push(create_gpu_device(
1734 cfg,
1735 _exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001736 gpu_device_tube,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001737 resource_bridges,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001738 // Use the unnamed socket for GPU display screens.
1739 cfg.wayland_socket_paths.get(""),
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001740 cfg.x_display.clone(),
Chia-I Wu16fb6592021-11-10 11:45:32 -08001741 render_server_fd,
Zach Reizner65b98f12019-11-22 17:34:58 -08001742 event_devices,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001743 map_request,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001744 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -07001745 }
1746 }
1747
Chih-Yang Hsiae31731c2022-01-05 17:30:28 +08001748 #[cfg(feature = "audio_cras")]
1749 {
1750 for cras_snd in &cfg.cras_snds {
1751 devs.push(create_cras_snd_device(cfg, cras_snd.clone())?);
1752 }
1753 }
1754
Daniel Verkampffb59122021-03-18 14:06:15 -07001755 #[cfg(feature = "video-decoder")]
1756 {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001757 if let Some((video_dec_tube, video_dec_backend)) = video_dec_cfg {
Daniel Verkampffb59122021-03-18 14:06:15 -07001758 register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001759 video_dec_backend,
Daniel Verkampffb59122021-03-18 14:06:15 -07001760 &mut devs,
1761 video_dec_tube,
1762 cfg,
1763 devices::virtio::VideoDeviceType::Decoder,
1764 )?;
1765 }
1766 }
1767
1768 #[cfg(feature = "video-encoder")]
1769 {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001770 if let Some((video_enc_tube, video_enc_backend)) = video_enc_cfg {
Daniel Verkampffb59122021-03-18 14:06:15 -07001771 register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +09001772 video_enc_backend,
Daniel Verkampffb59122021-03-18 14:06:15 -07001773 &mut devs,
1774 video_enc_tube,
1775 cfg,
1776 devices::virtio::VideoDeviceType::Encoder,
1777 )?;
1778 }
1779 }
1780
Zach Reizneraa575662018-08-15 10:46:32 -07001781 if let Some(cid) = cfg.cid {
Chirantan Ekbote3e8d52b2021-09-10 18:27:16 +09001782 devs.push(create_vhost_vsock_device(cfg, cid)?);
Zach Reizneraa575662018-08-15 10:46:32 -07001783 }
1784
Woody Chow5890b702021-02-12 14:57:02 +09001785 for vhost_user_fs in &cfg.vhost_user_fs {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001786 devs.push(create_vhost_user_fs_device(cfg, vhost_user_fs)?);
Woody Chow5890b702021-02-12 14:57:02 +09001787 }
1788
Woody Chow1b16db12021-04-02 16:59:59 +09001789 #[cfg(feature = "audio")]
1790 for vhost_user_snd in &cfg.vhost_user_snd {
1791 devs.push(create_vhost_user_snd_device(cfg, vhost_user_snd)?);
1792 }
1793
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001794 for shared_dir in &cfg.shared_dirs {
1795 let SharedDir {
1796 src,
1797 tag,
1798 kind,
1799 uid_map,
1800 gid_map,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001801 fs_cfg,
1802 p9_cfg,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001803 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -08001804
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001805 let dev = match kind {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001806 SharedDirKind::FS => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001807 let device_tube = fs_device_tubes.remove(0);
1808 create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone(), device_tube)?
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001809 }
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001810 SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag, p9_cfg.clone())?,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001811 };
1812 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -08001813 }
1814
JaeMan Parkeb9cc532021-07-02 15:02:59 +09001815 if let Some(vhost_user_mac80211_hwsim) = &cfg.vhost_user_mac80211_hwsim {
1816 devs.push(create_vhost_user_mac80211_hwsim_device(
1817 cfg,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001818 vhost_user_mac80211_hwsim,
JaeMan Parkeb9cc532021-07-02 15:02:59 +09001819 )?);
1820 }
1821
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001822 #[cfg(feature = "audio")]
1823 if let Some(path) = &cfg.sound {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001824 devs.push(create_sound_device(path, cfg)?);
Jorge E. Moreirad4562d02021-06-28 16:21:12 -07001825 }
1826
David Tolnay2b089fc2019-03-04 15:33:22 -08001827 Ok(devs)
1828}
1829
Xiong Zhang10f15052021-04-08 17:23:33 +08001830fn create_vfio_device(
1831 cfg: &Config,
1832 vm: &impl Vm,
1833 resources: &mut SystemAllocator,
1834 control_tubes: &mut Vec<TaggedControlTube>,
1835 vfio_path: &Path,
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001836 bus_num: Option<u8>,
Zide Chendfc4b882021-03-10 16:35:37 -08001837 endpoints: &mut BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
1838 iommu_enabled: bool,
Xiong Zhang10f15052021-04-08 17:23:33 +08001839) -> DeviceResult<(Box<VfioPciDevice>, Option<Minijail>)> {
Zide Chendfc4b882021-03-10 16:35:37 -08001840 let vfio_container = VfioCommonSetup::vfio_get_container(vfio_path, iommu_enabled)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001841 .context("failed to get vfio container")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001842
1843 // create MSI, MSI-X, and Mem request sockets for each vfio device
Daniel Verkamp6b298582021-08-16 15:37:11 -07001844 let (vfio_host_tube_msi, vfio_device_tube_msi) =
1845 Tube::pair().context("failed to create tube")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001846 control_tubes.push(TaggedControlTube::VmIrq(vfio_host_tube_msi));
1847
Daniel Verkamp6b298582021-08-16 15:37:11 -07001848 let (vfio_host_tube_msix, vfio_device_tube_msix) =
1849 Tube::pair().context("failed to create tube")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001850 control_tubes.push(TaggedControlTube::VmIrq(vfio_host_tube_msix));
1851
Daniel Verkamp6b298582021-08-16 15:37:11 -07001852 let (vfio_host_tube_mem, vfio_device_tube_mem) =
1853 Tube::pair().context("failed to create tube")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001854 control_tubes.push(TaggedControlTube::VmMemory(vfio_host_tube_mem));
1855
Xiong Zhange2ff2c42021-06-02 16:49:50 +08001856 let hotplug = bus_num.is_some();
1857
Keiichi Watanabe7b805542021-09-03 02:13:51 +09001858 let vfio_device =
1859 VfioDevice::new_passthrough(&vfio_path, vm, vfio_container.clone(), iommu_enabled)
1860 .context("failed to create vfio device")?;
Xiong Zhang10f15052021-04-08 17:23:33 +08001861 let mut vfio_pci_device = Box::new(VfioPciDevice::new(
1862 vfio_device,
Xiong Zhange19ab752021-05-20 18:18:46 +08001863 bus_num,
Xiong Zhang10f15052021-04-08 17:23:33 +08001864 vfio_device_tube_msi,
1865 vfio_device_tube_msix,
1866 vfio_device_tube_mem,
1867 ));
1868 // early reservation for pass-through PCI devices.
Zide Chendfc4b882021-03-10 16:35:37 -08001869 let endpoint_addr = vfio_pci_device.allocate_address(resources);
1870 if endpoint_addr.is_err() {
Xiong Zhang10f15052021-04-08 17:23:33 +08001871 warn!(
1872 "address reservation failed for vfio {}",
1873 vfio_pci_device.debug_label()
1874 );
1875 }
1876
Zide Chendfc4b882021-03-10 16:35:37 -08001877 if iommu_enabled {
1878 endpoints.insert(endpoint_addr.unwrap().to_u32(), vfio_container);
1879 }
1880
Xiong Zhange2ff2c42021-06-02 16:49:50 +08001881 if hotplug {
1882 Ok((vfio_pci_device, None))
1883 } else {
1884 Ok((vfio_pci_device, simple_jail(cfg, "vfio_device")?))
1885 }
Xiong Zhang10f15052021-04-08 17:23:33 +08001886}
1887
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001888fn create_vfio_platform_device(
1889 cfg: &Config,
1890 vm: &impl Vm,
1891 _resources: &mut SystemAllocator,
1892 control_tubes: &mut Vec<TaggedControlTube>,
1893 vfio_path: &Path,
1894 _endpoints: &mut BTreeMap<u32, Arc<Mutex<VfioContainer>>>,
1895 iommu_enabled: bool,
1896) -> DeviceResult<(VfioPlatformDevice, Option<Minijail>)> {
1897 let vfio_container = VfioCommonSetup::vfio_get_container(vfio_path, iommu_enabled)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001898 .context("Failed to create vfio device")?;
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001899
Daniel Verkamp6b298582021-08-16 15:37:11 -07001900 let (vfio_host_tube_mem, vfio_device_tube_mem) =
1901 Tube::pair().context("failed to create tube")?;
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001902 control_tubes.push(TaggedControlTube::VmMemory(vfio_host_tube_mem));
1903
Keiichi Watanabe7b805542021-09-03 02:13:51 +09001904 let vfio_device = VfioDevice::new_passthrough(&vfio_path, vm, vfio_container, iommu_enabled)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001905 .context("Failed to create vfio device")?;
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001906 let vfio_plat_dev = VfioPlatformDevice::new(vfio_device, vfio_device_tube_mem);
1907
1908 Ok((vfio_plat_dev, simple_jail(cfg, "vfio_platform_device")?))
1909}
1910
David Tolnay2b089fc2019-03-04 15:33:22 -08001911fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -06001912 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001913 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001914 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001915 exit_evt: &Event,
Zide Chen71435c12021-03-03 15:02:02 -08001916 phys_max_addr: u64,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001917 control_tubes: &mut Vec<TaggedControlTube>,
1918 wayland_device_tube: Tube,
1919 gpu_device_tube: Tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001920 vhost_user_gpu_tubes: Vec<(Tube, Tube)>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001921 balloon_device_tube: Tube,
1922 disk_device_tubes: &mut Vec<Tube>,
1923 pmem_device_tubes: &mut Vec<Tube>,
1924 fs_device_tubes: &mut Vec<Tube>,
Daniel Verkampf1439d42021-05-21 13:55:10 -07001925 #[cfg(feature = "usb")] usb_provider: HostBackendDeviceProvider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001926 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001927) -> DeviceResult<Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>> {
David Tolnay2b089fc2019-03-04 15:33:22 -08001928 let stubs = create_virtio_devices(
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001929 cfg,
Jakub Starona3411ea2019-04-24 10:55:25 -07001930 vm,
1931 resources,
David Tolnay2b089fc2019-03-04 15:33:22 -08001932 exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001933 wayland_device_tube,
1934 gpu_device_tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001935 vhost_user_gpu_tubes,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001936 balloon_device_tube,
1937 disk_device_tubes,
1938 pmem_device_tubes,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001939 map_request,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001940 fs_device_tubes,
David Tolnay2b089fc2019-03-04 15:33:22 -08001941 )?;
1942
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001943 let mut devices = Vec::new();
David Tolnay2b089fc2019-03-04 15:33:22 -08001944
1945 for stub in stubs {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001946 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001947 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
Zach Reiznerdc748482021-04-14 13:59:30 -07001948 let dev = VirtioPciDevice::new(vm.get_memory().clone(), stub.dev, msi_device_tube)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001949 .context("failed to create virtio pci dev")?;
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001950 let dev = Box::new(dev) as Box<dyn BusDeviceObj>;
1951 devices.push((dev, stub.jail));
David Tolnay2b089fc2019-03-04 15:33:22 -08001952 }
1953
Andrew Scull1590e6f2020-03-18 18:00:47 +00001954 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +08001955 for ac97_param in &cfg.ac97_parameters {
Zach Reiznerdc748482021-04-14 13:59:30 -07001956 let dev = Ac97Dev::try_new(vm.get_memory().clone(), ac97_param.clone())
Daniel Verkamp6b298582021-08-16 15:37:11 -07001957 .context("failed to create ac97 device")?;
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07001958 let jail = simple_jail(cfg, dev.minijail_policy())?;
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001959 devices.push((Box::new(dev), jail));
David Tolnay2b089fc2019-03-04 15:33:22 -08001960 }
Andrew Scull1590e6f2020-03-18 18:00:47 +00001961
Daniel Verkampf1439d42021-05-21 13:55:10 -07001962 #[cfg(feature = "usb")]
1963 {
1964 // Create xhci controller.
1965 let usb_controller = Box::new(XhciController::new(vm.get_memory().clone(), usb_provider));
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001966 devices.push((usb_controller, simple_jail(cfg, "xhci")?));
Daniel Verkampf1439d42021-05-21 13:55:10 -07001967 }
David Tolnay2b089fc2019-03-04 15:33:22 -08001968
Zide Chen5deee482021-04-19 11:06:01 -07001969 if !cfg.vfio.is_empty() {
Zide Chendfc4b882021-03-10 16:35:37 -08001970 let mut iommu_attached_endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>> =
1971 BTreeMap::new();
1972
Tomasz Nowicki71aca792021-06-09 18:53:49 +00001973 for vfio_dev in cfg
1974 .vfio
1975 .iter()
1976 .filter(|dev| dev.get_type() == VfioType::Pci)
1977 {
1978 let vfio_path = &vfio_dev.vfio_path;
Zide Chen5deee482021-04-19 11:06:01 -07001979 let (vfio_pci_device, jail) = create_vfio_device(
1980 cfg,
1981 vm,
1982 resources,
1983 control_tubes,
1984 vfio_path.as_path(),
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001985 None,
Zide Chendfc4b882021-03-10 16:35:37 -08001986 &mut iommu_attached_endpoints,
Tomasz Nowicki71aca792021-06-09 18:53:49 +00001987 vfio_dev.iommu_enabled(),
Zide Chen5deee482021-04-19 11:06:01 -07001988 )?;
Zide Chendfc4b882021-03-10 16:35:37 -08001989
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001990 devices.push((vfio_pci_device, jail));
Zide Chen5deee482021-04-19 11:06:01 -07001991 }
Zide Chendfc4b882021-03-10 16:35:37 -08001992
Tomasz Nowicki344eb142021-09-22 05:51:58 +00001993 for vfio_dev in cfg
1994 .vfio
1995 .iter()
1996 .filter(|dev| dev.get_type() == VfioType::Platform)
1997 {
1998 let vfio_path = &vfio_dev.vfio_path;
1999 let (vfio_plat_dev, jail) = create_vfio_platform_device(
2000 cfg,
2001 vm,
2002 resources,
2003 control_tubes,
2004 vfio_path.as_path(),
2005 &mut iommu_attached_endpoints,
2006 false, // Virtio IOMMU is not supported yet
2007 )?;
2008
2009 devices.push((Box::new(vfio_plat_dev), jail));
2010 }
2011
Zide Chendfc4b882021-03-10 16:35:37 -08002012 if !iommu_attached_endpoints.is_empty() {
Zide Chen71435c12021-03-03 15:02:02 -08002013 let iommu_dev = create_iommu_device(cfg, phys_max_addr, iommu_attached_endpoints)?;
Zide Chendfc4b882021-03-10 16:35:37 -08002014
Daniel Verkamp6b298582021-08-16 15:37:11 -07002015 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
Zide Chendfc4b882021-03-10 16:35:37 -08002016 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
Peter Fangad3b24e2021-06-21 00:43:29 -07002017 let mut dev =
2018 VirtioPciDevice::new(vm.get_memory().clone(), iommu_dev.dev, msi_device_tube)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002019 .context("failed to create virtio pci dev")?;
Peter Fangad3b24e2021-06-21 00:43:29 -07002020 // early reservation for viommu.
2021 dev.allocate_address(resources)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002022 .context("failed to allocate resources early for virtio pci dev")?;
Peter Fangad3b24e2021-06-21 00:43:29 -07002023 let dev = Box::new(dev);
Tomasz Nowickiab86d522021-09-22 05:50:46 +00002024 devices.push((dev, iommu_dev.jail));
Zide Chendfc4b882021-03-10 16:35:37 -08002025 }
Xiong Zhang17b0daf2019-04-23 17:14:50 +08002026 }
2027
Mattias Nisslerde2c6402021-10-21 12:05:29 +00002028 for params in &cfg.stub_pci_devices {
2029 // Stub devices don't need jailing since they don't do anything.
2030 devices.push((Box::new(StubPciDevice::new(params)), None));
2031 }
2032
Tomasz Nowickiab86d522021-09-22 05:50:46 +00002033 Ok(devices)
David Tolnay2b089fc2019-03-04 15:33:22 -08002034}
2035
2036#[derive(Copy, Clone)]
Chirantan Ekbote1a2683b2019-11-26 16:28:23 +09002037#[cfg_attr(not(feature = "tpm"), allow(dead_code))]
David Tolnay2b089fc2019-03-04 15:33:22 -08002038struct Ids {
2039 uid: uid_t,
2040 gid: gid_t,
2041}
2042
David Tolnay48c48292019-03-01 16:54:25 -08002043// Set the uid/gid for the jailed process and give a basic id map. This is
2044// required for bind mounts to work.
Fergus Dall51200512021-08-19 12:54:26 +10002045fn add_current_user_to_jail(jail: &mut Minijail) -> Result<Ids> {
2046 let crosvm_uid = geteuid();
2047 let crosvm_gid = getegid();
David Tolnay48c48292019-03-01 16:54:25 -08002048
David Tolnay48c48292019-03-01 16:54:25 -08002049 jail.uidmap(&format!("{0} {0} 1", crosvm_uid))
Daniel Verkamp6b298582021-08-16 15:37:11 -07002050 .context("error setting UID map")?;
David Tolnay48c48292019-03-01 16:54:25 -08002051 jail.gidmap(&format!("{0} {0} 1", crosvm_gid))
Daniel Verkamp6b298582021-08-16 15:37:11 -07002052 .context("error setting GID map")?;
David Tolnay48c48292019-03-01 16:54:25 -08002053
Chirantan Ekbotee1663ee2021-09-03 18:31:25 +09002054 if crosvm_uid != 0 {
2055 jail.change_uid(crosvm_uid);
2056 }
2057 if crosvm_gid != 0 {
2058 jail.change_gid(crosvm_gid);
2059 }
Fergus Dall51200512021-08-19 12:54:26 +10002060
David Tolnay41a6f842019-03-01 16:18:44 -08002061 Ok(Ids {
2062 uid: crosvm_uid,
2063 gid: crosvm_gid,
2064 })
David Tolnay48c48292019-03-01 16:54:25 -08002065}
2066
Chia-I Wu16fb6592021-11-10 11:45:32 -08002067fn add_current_user_as_root_to_jail(jail: &mut Minijail) -> Result<Ids> {
2068 let crosvm_uid = geteuid();
2069 let crosvm_gid = getegid();
2070 jail.uidmap(&format!("0 {0} 1", crosvm_uid))
2071 .context("error setting UID map")?;
2072 jail.gidmap(&format!("0 {0} 1", crosvm_gid))
2073 .context("error setting GID map")?;
2074
2075 Ok(Ids {
2076 uid: crosvm_uid,
2077 gid: crosvm_gid,
2078 })
2079}
2080
Zach Reizner65b98f12019-11-22 17:34:58 -08002081trait IntoUnixStream {
2082 fn into_unix_stream(self) -> Result<UnixStream>;
2083}
2084
2085impl<'a> IntoUnixStream for &'a Path {
2086 fn into_unix_stream(self) -> Result<UnixStream> {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002087 if let Some(fd) = safe_descriptor_from_path(self).context("failed to open event device")? {
Andrew Walbranbc55e302021-07-13 17:35:10 +01002088 Ok(fd.into())
Zach Reizner65b98f12019-11-22 17:34:58 -08002089 } else {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002090 UnixStream::connect(self).context("failed to open event device")
Zach Reizner65b98f12019-11-22 17:34:58 -08002091 }
2092 }
2093}
2094impl<'a> IntoUnixStream for &'a PathBuf {
2095 fn into_unix_stream(self) -> Result<UnixStream> {
2096 self.as_path().into_unix_stream()
2097 }
2098}
2099
2100impl IntoUnixStream for UnixStream {
2101 fn into_unix_stream(self) -> Result<UnixStream> {
2102 Ok(self)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08002103 }
2104}
2105
Steven Richmanf32d0b42020-06-20 21:45:32 -07002106fn setup_vcpu_signal_handler<T: Vcpu>(use_hypervisor_signals: bool) -> Result<()> {
2107 if use_hypervisor_signals {
Matt Delco84cf9c02019-10-07 22:38:13 -07002108 unsafe {
Allen Webb44c728c2021-03-23 15:22:41 -05002109 extern "C" fn handle_signal(_: c_int) {}
Matt Delco84cf9c02019-10-07 22:38:13 -07002110 // Our signal handler does nothing and is trivially async signal safe.
2111 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002112 .context("error registering signal handler")?;
Matt Delco84cf9c02019-10-07 22:38:13 -07002113 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07002114 block_signal(SIGRTMIN() + 0).context("failed to block signal")?;
Matt Delco84cf9c02019-10-07 22:38:13 -07002115 } else {
2116 unsafe {
Allen Webb44c728c2021-03-23 15:22:41 -05002117 extern "C" fn handle_signal<T: Vcpu>(_: c_int) {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002118 T::set_local_immediate_exit(true);
Matt Delco84cf9c02019-10-07 22:38:13 -07002119 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002120 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal::<T>)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002121 .context("error registering signal handler")?;
Matt Delco84cf9c02019-10-07 22:38:13 -07002122 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002123 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002124 Ok(())
2125}
2126
Steven Richmanf32d0b42020-06-20 21:45:32 -07002127// Sets up a vcpu and converts it into a runnable vcpu.
Zach Reizner2c770e62020-09-30 16:49:59 -07002128fn runnable_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002129 cpu_id: usize,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002130 kvm_vcpu_id: usize,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002131 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07002132 vm: impl VmArch,
Zach Reiznerdc748482021-04-14 13:59:30 -07002133 irq_chip: &mut dyn IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002134 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002135 run_rt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002136 vcpu_affinity: Vec<usize>,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002137 no_smt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002138 has_bios: bool,
2139 use_hypervisor_signals: bool,
Yusuke Sato31e136a2021-08-18 11:51:38 -07002140 enable_per_vm_core_scheduling: bool,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002141 host_cpu_topology: bool,
Zach Reizner2c770e62020-09-30 16:49:59 -07002142) -> Result<(V, VcpuRunHandle)>
Steven Richmanf32d0b42020-06-20 21:45:32 -07002143where
Zach Reizner2c770e62020-09-30 16:49:59 -07002144 V: VcpuArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002145{
Zach Reizner304e7312020-09-29 16:00:24 -07002146 let mut vcpu = match vcpu {
2147 Some(v) => v,
2148 None => {
2149 // If vcpu is None, it means this arch/hypervisor requires create_vcpu to be called from
2150 // the vcpu thread.
2151 match vm
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002152 .create_vcpu(kvm_vcpu_id)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002153 .context("failed to create vcpu")?
Zach Reizner304e7312020-09-29 16:00:24 -07002154 .downcast::<V>()
2155 {
2156 Ok(v) => *v,
2157 Err(_) => panic!("VM created wrong type of VCPU"),
2158 }
2159 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002160 };
Dylan Reidbb30b2f2019-10-22 18:30:36 +03002161
Steven Richmanf32d0b42020-06-20 21:45:32 -07002162 irq_chip
Zach Reizner304e7312020-09-29 16:00:24 -07002163 .add_vcpu(cpu_id, &vcpu)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002164 .context("failed to add vcpu to irq chip")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002165
Daniel Verkampcaf9ced2020-09-29 15:35:02 -07002166 if !vcpu_affinity.is_empty() {
2167 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
2168 error!("Failed to set CPU affinity: {}", e);
2169 }
2170 }
2171
Steven Richmanf32d0b42020-06-20 21:45:32 -07002172 Arch::configure_vcpu(
Daniel Verkamp6f4f8222022-01-05 14:09:09 -08002173 &vm,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002174 vm.get_hypervisor(),
2175 irq_chip,
2176 &mut vcpu,
2177 cpu_id,
2178 vcpu_count,
2179 has_bios,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002180 no_smt,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002181 host_cpu_topology,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002182 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002183 .context("failed to configure vcpu")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002184
Yusuke Sato31e136a2021-08-18 11:51:38 -07002185 if !enable_per_vm_core_scheduling {
2186 // Do per-vCPU core scheduling by setting a unique cookie to each vCPU.
2187 if let Err(e) = enable_core_scheduling() {
2188 error!("Failed to enable core scheduling: {}", e);
2189 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002190 }
2191
Kansho Nishidaab205af2020-08-13 18:17:50 +09002192 if run_rt {
2193 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
2194 if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
2195 .and_then(|_| set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL)))
2196 {
2197 warn!("Failed to set vcpu to real time: {}", e);
2198 }
2199 }
2200
Steven Richmanf32d0b42020-06-20 21:45:32 -07002201 if use_hypervisor_signals {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002202 let mut v = get_blocked_signals().context("failed to retrieve signal mask for vcpu")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002203 v.retain(|&x| x != SIGRTMIN() + 0);
Daniel Verkamp6b298582021-08-16 15:37:11 -07002204 vcpu.set_signal_mask(&v)
2205 .context("failed to set the signal mask for vcpu")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002206 }
2207
Zach Reizner2c770e62020-09-30 16:49:59 -07002208 let vcpu_run_handle = vcpu
2209 .take_run_handle(Some(SIGRTMIN() + 0))
Daniel Verkamp6b298582021-08-16 15:37:11 -07002210 .context("failed to set thread id for vcpu")?;
Zach Reizner2c770e62020-09-30 16:49:59 -07002211
2212 Ok((vcpu, vcpu_run_handle))
Dylan Reidbb30b2f2019-10-22 18:30:36 +03002213}
2214
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002215#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2216fn handle_debug_msg<V>(
2217 cpu_id: usize,
2218 vcpu: &V,
2219 guest_mem: &GuestMemory,
2220 d: VcpuDebug,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002221 reply_tube: &mpsc::Sender<VcpuDebugStatusMessage>,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002222) -> Result<()>
2223where
2224 V: VcpuArch + 'static,
2225{
2226 match d {
2227 VcpuDebug::ReadRegs => {
2228 let msg = VcpuDebugStatusMessage {
2229 cpu: cpu_id as usize,
2230 msg: VcpuDebugStatus::RegValues(
Daniel Verkamp6b298582021-08-16 15:37:11 -07002231 Arch::debug_read_registers(vcpu as &V)
2232 .context("failed to handle a gdb ReadRegs command")?,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002233 ),
2234 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002235 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002236 .send(msg)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002237 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002238 }
2239 VcpuDebug::WriteRegs(regs) => {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002240 Arch::debug_write_registers(vcpu as &V, &regs)
2241 .context("failed to handle a gdb WriteRegs command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002242 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002243 .send(VcpuDebugStatusMessage {
2244 cpu: cpu_id as usize,
2245 msg: VcpuDebugStatus::CommandComplete,
2246 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002247 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002248 }
2249 VcpuDebug::ReadMem(vaddr, len) => {
2250 let msg = VcpuDebugStatusMessage {
2251 cpu: cpu_id as usize,
2252 msg: VcpuDebugStatus::MemoryRegion(
2253 Arch::debug_read_memory(vcpu as &V, guest_mem, vaddr, len)
2254 .unwrap_or(Vec::new()),
2255 ),
2256 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002257 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002258 .send(msg)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002259 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002260 }
2261 VcpuDebug::WriteMem(vaddr, buf) => {
2262 Arch::debug_write_memory(vcpu as &V, guest_mem, vaddr, &buf)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002263 .context("failed to handle a gdb WriteMem command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002264 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002265 .send(VcpuDebugStatusMessage {
2266 cpu: cpu_id as usize,
2267 msg: VcpuDebugStatus::CommandComplete,
2268 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002269 .context("failed to send a debug status to GDB thread")
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002270 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002271 VcpuDebug::EnableSinglestep => {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002272 Arch::debug_enable_singlestep(vcpu as &V)
2273 .context("failed to handle a gdb EnableSingleStep command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002274 reply_tube
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002275 .send(VcpuDebugStatusMessage {
2276 cpu: cpu_id as usize,
2277 msg: VcpuDebugStatus::CommandComplete,
2278 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002279 .context("failed to send a debug status to GDB thread")
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002280 }
2281 VcpuDebug::SetHwBreakPoint(addrs) => {
2282 Arch::debug_set_hw_breakpoints(vcpu as &V, &addrs)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002283 .context("failed to handle a gdb SetHwBreakPoint command")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002284 reply_tube
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002285 .send(VcpuDebugStatusMessage {
2286 cpu: cpu_id as usize,
2287 msg: VcpuDebugStatus::CommandComplete,
2288 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002289 .context("failed to send a debug status to GDB thread")
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002290 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002291 }
2292}
2293
Zach Reizner2c770e62020-09-30 16:49:59 -07002294fn run_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002295 cpu_id: usize,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002296 kvm_vcpu_id: usize,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002297 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07002298 vm: impl VmArch + 'static,
Zach Reiznerdc748482021-04-14 13:59:30 -07002299 mut irq_chip: Box<dyn IrqChipArch + 'static>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002300 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002301 run_rt: bool,
Daniel Verkamp107edb32019-04-05 09:58:48 -07002302 vcpu_affinity: Vec<usize>,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002303 delay_rt: bool,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002304 no_smt: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07002305 start_barrier: Arc<Barrier>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002306 has_bios: bool,
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07002307 mut io_bus: devices::Bus,
2308 mut mmio_bus: devices::Bus,
Michael Hoyle685316f2020-09-16 15:29:20 -07002309 exit_evt: Event,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002310 requires_pvclock_ctrl: bool,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002311 from_main_tube: mpsc::Receiver<VcpuControl>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002312 use_hypervisor_signals: bool,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002313 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_tube: Option<
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002314 mpsc::Sender<VcpuDebugStatusMessage>,
2315 >,
Yusuke Sato31e136a2021-08-18 11:51:38 -07002316 enable_per_vm_core_scheduling: bool,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002317 host_cpu_topology: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002318) -> Result<JoinHandle<()>>
2319where
Zach Reizner2c770e62020-09-30 16:49:59 -07002320 V: VcpuArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002321{
Zach Reizner8fb52112017-12-13 16:04:39 -08002322 thread::Builder::new()
2323 .name(format!("crosvm_vcpu{}", cpu_id))
2324 .spawn(move || {
Zach Reizner95885312020-01-29 18:06:01 -08002325 // The VCPU thread must trigger the `exit_evt` in all paths, and a `ScopedEvent`'s Drop
2326 // implementation accomplishes that.
2327 let _scoped_exit_evt = ScopedEvent::from(exit_evt);
2328
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002329 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2330 let guest_mem = vm.get_memory().clone();
Zach Reizner2c770e62020-09-30 16:49:59 -07002331 let runnable_vcpu = runnable_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002332 cpu_id,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002333 kvm_vcpu_id,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002334 vcpu,
2335 vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07002336 irq_chip.as_mut(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002337 vcpu_count,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002338 run_rt && !delay_rt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002339 vcpu_affinity,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002340 no_smt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002341 has_bios,
2342 use_hypervisor_signals,
Yusuke Sato31e136a2021-08-18 11:51:38 -07002343 enable_per_vm_core_scheduling,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002344 host_cpu_topology,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002345 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08002346
Zach Reizner8fb52112017-12-13 16:04:39 -08002347 start_barrier.wait();
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002348
Zach Reizner2c770e62020-09-30 16:49:59 -07002349 let (vcpu, vcpu_run_handle) = match runnable_vcpu {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002350 Ok(v) => v,
2351 Err(e) => {
Maciek Swiechc3011222021-11-24 21:01:04 +00002352 error!("failed to start vcpu {}: {:#}", cpu_id, e);
Steven Richmanf32d0b42020-06-20 21:45:32 -07002353 return;
2354 }
2355 };
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002356
Dylan Reidb0492662019-05-17 14:50:13 -07002357 let mut run_mode = VmRunMode::Running;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002358 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002359 if to_gdb_tube.is_some() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002360 // Wait until a GDB client attaches
2361 run_mode = VmRunMode::Breakpoint;
2362 }
2363
Dylan Reidb0492662019-05-17 14:50:13 -07002364 let mut interrupted_by_signal = false;
2365
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07002366 mmio_bus.set_access_id(cpu_id);
2367 io_bus.set_access_id(cpu_id);
2368
Dylan Reidb0492662019-05-17 14:50:13 -07002369 'vcpu_loop: loop {
2370 // Start by checking for messages to process and the run state of the CPU.
2371 // An extra check here for Running so there isn't a need to call recv unless a
2372 // message is likely to be ready because a signal was sent.
2373 if interrupted_by_signal || run_mode != VmRunMode::Running {
2374 'state_loop: loop {
2375 // Tries to get a pending message without blocking first.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002376 let msg = match from_main_tube.try_recv() {
Dylan Reidb0492662019-05-17 14:50:13 -07002377 Ok(m) => m,
2378 Err(mpsc::TryRecvError::Empty) if run_mode == VmRunMode::Running => {
2379 // If the VM is running and no message is pending, the state won't
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002380 // change.
Dylan Reidb0492662019-05-17 14:50:13 -07002381 break 'state_loop;
2382 }
2383 Err(mpsc::TryRecvError::Empty) => {
2384 // If the VM is not running, wait until a message is ready.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002385 match from_main_tube.recv() {
Dylan Reidb0492662019-05-17 14:50:13 -07002386 Ok(m) => m,
2387 Err(mpsc::RecvError) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002388 error!("Failed to read from main tube in vcpu");
Dylan Reidb0492662019-05-17 14:50:13 -07002389 break 'vcpu_loop;
2390 }
2391 }
2392 }
2393 Err(mpsc::TryRecvError::Disconnected) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002394 error!("Failed to read from main tube in vcpu");
Dylan Reidb0492662019-05-17 14:50:13 -07002395 break 'vcpu_loop;
2396 }
2397 };
2398
2399 // Collect all pending messages.
2400 let mut messages = vec![msg];
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002401 messages.append(&mut from_main_tube.try_iter().collect());
Dylan Reidb0492662019-05-17 14:50:13 -07002402
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002403 for msg in messages {
2404 match msg {
2405 VcpuControl::RunState(new_mode) => {
2406 run_mode = new_mode;
2407 match run_mode {
2408 VmRunMode::Running => break 'state_loop,
2409 VmRunMode::Suspending => {
2410 // On KVM implementations that use a paravirtualized
2411 // clock (e.g. x86), a flag must be set to indicate to
2412 // the guest kernel that a vCPU was suspended. The guest
2413 // kernel will use this flag to prevent the soft lockup
2414 // detection from triggering when this vCPU resumes,
2415 // which could happen days later in realtime.
2416 if requires_pvclock_ctrl {
2417 if let Err(e) = vcpu.pvclock_ctrl() {
2418 error!(
2419 "failed to tell hypervisor vcpu {} is suspending: {}",
2420 cpu_id, e
2421 );
2422 }
2423 }
2424 }
2425 VmRunMode::Breakpoint => {}
2426 VmRunMode::Exiting => break 'vcpu_loop,
2427 }
2428 }
2429 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2430 VcpuControl::Debug(d) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002431 match &to_gdb_tube {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002432 Some(ref ch) => {
2433 if let Err(e) = handle_debug_msg(
Daniel Verkamp166d1dd2021-08-19 17:05:29 -07002434 cpu_id, &vcpu, &guest_mem, d, ch,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002435 ) {
2436 error!("Failed to handle gdb message: {}", e);
2437 }
2438 },
2439 None => {
2440 error!("VcpuControl::Debug received while GDB feature is disabled: {:?}", d);
Dylan Reidb0492662019-05-17 14:50:13 -07002441 }
2442 }
2443 }
Suleiman Souhlal2ac78b92021-02-01 12:33:26 +09002444 VcpuControl::MakeRT => {
2445 if run_rt && delay_rt {
2446 info!("Making vcpu {} RT\n", cpu_id);
2447 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
2448 if let Err(e) = set_rt_prio_limit(
2449 u64::from(DEFAULT_VCPU_RT_LEVEL))
2450 .and_then(|_|
2451 set_rt_round_robin(
2452 i32::from(DEFAULT_VCPU_RT_LEVEL)
2453 ))
2454 {
2455 warn!("Failed to set vcpu to real time: {}", e);
2456 }
2457 }
2458 }
Dylan Reidb0492662019-05-17 14:50:13 -07002459 }
2460 }
2461 }
2462 }
2463
2464 interrupted_by_signal = false;
2465
Steven Richman11dc6712020-09-02 15:39:14 -07002466 // Vcpus may have run a HLT instruction, which puts them into a state other than
2467 // VcpuRunState::Runnable. In that case, this call to wait_until_runnable blocks
2468 // until either the irqchip receives an interrupt for this vcpu, or until the main
2469 // thread kicks this vcpu as a result of some VmControl operation. In most IrqChip
2470 // implementations HLT instructions do not make it to crosvm, and thus this is a
2471 // no-op that always returns VcpuRunState::Runnable.
2472 match irq_chip.wait_until_runnable(&vcpu) {
2473 Ok(VcpuRunState::Runnable) => {}
2474 Ok(VcpuRunState::Interrupted) => interrupted_by_signal = true,
2475 Err(e) => error!(
2476 "error waiting for vcpu {} to become runnable: {}",
2477 cpu_id, e
2478 ),
2479 }
2480
2481 if !interrupted_by_signal {
2482 match vcpu.run(&vcpu_run_handle) {
2483 Ok(VcpuExit::IoIn { port, mut size }) => {
2484 let mut data = [0; 8];
2485 if size > data.len() {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002486 error!("unsupported IoIn size of {} bytes at port {:#x}", size, port);
Steven Richman11dc6712020-09-02 15:39:14 -07002487 size = data.len();
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002488 }
Steven Richman11dc6712020-09-02 15:39:14 -07002489 io_bus.read(port as u64, &mut data[..size]);
2490 if let Err(e) = vcpu.set_data(&data[..size]) {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002491 error!("failed to set return data for IoIn at port {:#x}: {}", port, e);
Steven Richman11dc6712020-09-02 15:39:14 -07002492 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002493 }
Steven Richman11dc6712020-09-02 15:39:14 -07002494 Ok(VcpuExit::IoOut {
2495 port,
2496 mut size,
2497 data,
2498 }) => {
2499 if size > data.len() {
Dmitry Torokhova0410682021-08-01 10:40:50 -07002500 error!("unsupported IoOut size of {} bytes at port {:#x}", size, port);
Steven Richman11dc6712020-09-02 15:39:14 -07002501 size = data.len();
2502 }
2503 io_bus.write(port as u64, &data[..size]);
2504 }
2505 Ok(VcpuExit::MmioRead { address, size }) => {
2506 let mut data = [0; 8];
2507 mmio_bus.read(address, &mut data[..size]);
2508 // Setting data for mmio can not fail.
2509 let _ = vcpu.set_data(&data[..size]);
2510 }
2511 Ok(VcpuExit::MmioWrite {
2512 address,
2513 size,
2514 data,
2515 }) => {
2516 mmio_bus.write(address, &data[..size]);
2517 }
2518 Ok(VcpuExit::IoapicEoi { vector }) => {
2519 if let Err(e) = irq_chip.broadcast_eoi(vector) {
2520 error!(
2521 "failed to broadcast eoi {} on vcpu {}: {}",
2522 vector, cpu_id, e
2523 );
2524 }
2525 }
2526 Ok(VcpuExit::IrqWindowOpen) => {}
Leo Lai558460f2021-07-23 05:32:27 +00002527 Ok(VcpuExit::Hlt) => irq_chip.halted(cpu_id),
Steven Richman11dc6712020-09-02 15:39:14 -07002528 Ok(VcpuExit::Shutdown) => break,
2529 Ok(VcpuExit::FailEntry {
2530 hardware_entry_failure_reason,
2531 }) => {
2532 error!("vcpu hw run failure: {:#x}", hardware_entry_failure_reason);
Steven Richmanf32d0b42020-06-20 21:45:32 -07002533 break;
2534 }
Steven Richman11dc6712020-09-02 15:39:14 -07002535 Ok(VcpuExit::SystemEvent(_, _)) => break,
2536 Ok(VcpuExit::Debug { .. }) => {
2537 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2538 {
2539 let msg = VcpuDebugStatusMessage {
2540 cpu: cpu_id as usize,
2541 msg: VcpuDebugStatus::HitBreakPoint,
2542 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002543 if let Some(ref ch) = to_gdb_tube {
Steven Richman11dc6712020-09-02 15:39:14 -07002544 if let Err(e) = ch.send(msg) {
2545 error!("failed to notify breakpoint to GDB thread: {}", e);
2546 break;
2547 }
2548 }
2549 run_mode = VmRunMode::Breakpoint;
2550 }
2551 }
2552 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
2553 Err(e) => match e.errno() {
2554 libc::EINTR => interrupted_by_signal = true,
2555 libc::EAGAIN => {}
2556 _ => {
2557 error!("vcpu hit unknown error: {}", e);
2558 break;
2559 }
2560 },
2561 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002562 }
2563
2564 if interrupted_by_signal {
2565 if use_hypervisor_signals {
2566 // Try to clear the signal that we use to kick VCPU if it is pending before
2567 // attempting to handle pause requests.
2568 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
2569 error!("failed to clear pending signal: {}", e);
2570 break;
2571 }
2572 } else {
2573 vcpu.set_immediate_exit(false);
2574 }
David Tolnay8f3a2322018-11-30 17:11:35 -08002575 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002576
Steven Richman11dc6712020-09-02 15:39:14 -07002577 if let Err(e) = irq_chip.inject_interrupts(&vcpu) {
2578 error!("failed to inject interrupts for vcpu {}: {}", cpu_id, e);
2579 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002580 }
David Tolnay2bac1e72018-12-12 14:33:42 -08002581 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002582 .context("failed to spawn VCPU thread")
Zach Reizner39aa26b2017-12-12 18:03:23 -08002583}
2584
Zach Reiznera90649a2021-03-31 12:56:08 -07002585fn setup_vm_components(cfg: &Config) -> Result<VmComponents> {
David Tolnay2b089fc2019-03-04 15:33:22 -08002586 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +01002587 Some(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002588 open_file(
2589 initrd_path,
2590 true, /*read_only*/
2591 false, /*O_DIRECT*/
2592 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002593 .with_context(|| format!("failed to open initrd {}", initrd_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +01002594 )
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002595 } else {
2596 None
2597 };
2598
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002599 let vm_image = match cfg.executable_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +01002600 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002601 open_file(
2602 kernel_path,
2603 true, /*read_only*/
2604 false, /*O_DIRECT*/
2605 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002606 .with_context(|| format!("failed to open kernel image {}", kernel_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +01002607 ),
2608 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +09002609 open_file(bios_path, true /*read_only*/, false /*O_DIRECT*/)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002610 .with_context(|| format!("failed to open bios {}", bios_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +01002611 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002612 _ => panic!("Did not receive a bios or kernel, should be impossible."),
2613 };
2614
Will Deaconc48e7832021-07-30 19:03:06 +01002615 let swiotlb = if let Some(size) = cfg.swiotlb {
2616 Some(
2617 size.checked_mul(1024 * 1024)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002618 .ok_or_else(|| anyhow!("requested swiotlb size too large"))?,
Will Deaconc48e7832021-07-30 19:03:06 +01002619 )
2620 } else {
2621 match cfg.protected_vm {
Andrew Walbran0bbbb682021-12-13 13:42:07 +00002622 ProtectionType::Protected | ProtectionType::ProtectedWithoutFirmware => {
2623 Some(64 * 1024 * 1024)
2624 }
Will Deaconc48e7832021-07-30 19:03:06 +01002625 ProtectionType::Unprotected => None,
2626 }
2627 };
2628
Zach Reiznera90649a2021-03-31 12:56:08 -07002629 Ok(VmComponents {
Daniel Verkamp6a847062019-11-26 13:16:35 -08002630 memory_size: cfg
2631 .memory
2632 .unwrap_or(256)
2633 .checked_mul(1024 * 1024)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002634 .ok_or_else(|| anyhow!("requested memory size too large"))?,
Will Deaconc48e7832021-07-30 19:03:06 +01002635 swiotlb,
Dylan Reid059a1882018-07-23 17:58:09 -07002636 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -07002637 vcpu_affinity: cfg.vcpu_affinity.clone(),
Daniel Verkamp8a72afc2021-03-15 17:55:52 -07002638 cpu_clusters: cfg.cpu_clusters.clone(),
2639 cpu_capacity: cfg.cpu_capacity.clone(),
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002640 no_smt: cfg.no_smt,
Sergey Senozhatsky1e369c52021-04-13 20:23:51 +09002641 hugepages: cfg.hugepages,
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002642 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002643 android_fstab: cfg
2644 .android_fstab
2645 .as_ref()
Daniel Verkamp6b298582021-08-16 15:37:11 -07002646 .map(|x| {
2647 File::open(x)
2648 .with_context(|| format!("failed to open android fstab file {}", x.display()))
2649 })
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002650 .map_or(Ok(None), |v| v.map(Some))?,
Kansho Nishida282115b2019-12-18 13:13:14 +09002651 pstore: cfg.pstore.clone(),
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002652 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -07002653 extra_kernel_params: cfg.params.clone(),
Tomasz Jeznach42644642020-05-20 23:27:59 -07002654 acpi_sdts: cfg
2655 .acpi_tables
2656 .iter()
Daniel Verkamp6b298582021-08-16 15:37:11 -07002657 .map(|path| {
2658 SDT::from_file(path)
2659 .with_context(|| format!("failed to open ACPI file {}", path.display()))
2660 })
Tomasz Jeznach42644642020-05-20 23:27:59 -07002661 .collect::<Result<Vec<SDT>>>()?,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002662 rt_cpus: cfg.rt_cpus.clone(),
Suleiman Souhlal63630e82021-02-18 11:53:11 +09002663 delay_rt: cfg.delay_rt,
Will Deacon7d2b8ac2020-10-06 18:51:12 +01002664 protected_vm: cfg.protected_vm,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002665 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznera90649a2021-03-31 12:56:08 -07002666 gdb: None,
Tomasz Jeznachccb26942021-03-30 22:44:11 -07002667 dmi_path: cfg.dmi_path.clone(),
Tomasz Jeznachd93c29f2021-04-12 11:00:24 -07002668 no_legacy: cfg.no_legacy,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002669 host_cpu_topology: cfg.host_cpu_topology,
Zach Reiznera90649a2021-03-31 12:56:08 -07002670 })
2671}
2672
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002673pub enum ExitState {
2674 Reset,
2675 Stop,
2676}
2677
2678pub fn run_config(cfg: Config) -> Result<ExitState> {
Zach Reiznerdc748482021-04-14 13:59:30 -07002679 let components = setup_vm_components(&cfg)?;
2680
2681 let guest_mem_layout =
Daniel Verkamp6b298582021-08-16 15:37:11 -07002682 Arch::guest_memory_layout(&components).context("failed to create guest memory layout")?;
2683 let guest_mem = GuestMemory::new(&guest_mem_layout).context("failed to create guest memory")?;
Zach Reiznerdc748482021-04-14 13:59:30 -07002684 let mut mem_policy = MemoryPolicy::empty();
2685 if components.hugepages {
2686 mem_policy |= MemoryPolicy::USE_HUGEPAGES;
2687 }
Quentin Perret26203802021-12-02 09:48:43 +00002688 guest_mem.set_memory_policy(mem_policy);
Daniel Verkamp6b298582021-08-16 15:37:11 -07002689 let kvm = Kvm::new_with_path(&cfg.kvm_device_path).context("failed to create kvm")?;
Andrew Walbran00f1c9f2021-12-10 17:13:08 +00002690 let vm = KvmVm::new(&kvm, guest_mem, components.protected_vm).context("failed to create vm")?;
Daniel Verkamp6b298582021-08-16 15:37:11 -07002691 let vm_clone = vm.try_clone().context("failed to clone vm")?;
Zach Reiznerdc748482021-04-14 13:59:30 -07002692
2693 enum KvmIrqChip {
2694 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2695 Split(KvmSplitIrqChip),
2696 Kernel(KvmKernelIrqChip),
2697 }
2698
2699 impl KvmIrqChip {
2700 fn as_mut(&mut self) -> &mut dyn IrqChipArch {
2701 match self {
2702 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2703 KvmIrqChip::Split(i) => i,
2704 KvmIrqChip::Kernel(i) => i,
2705 }
2706 }
2707 }
2708
2709 let ioapic_host_tube;
2710 let mut irq_chip = if cfg.split_irqchip {
2711 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
2712 unimplemented!("KVM split irqchip mode only supported on x86 processors");
2713 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2714 {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002715 let (host_tube, ioapic_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerdc748482021-04-14 13:59:30 -07002716 ioapic_host_tube = Some(host_tube);
2717 KvmIrqChip::Split(
2718 KvmSplitIrqChip::new(
2719 vm_clone,
2720 components.vcpu_count,
2721 ioapic_device_tube,
2722 Some(120),
2723 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002724 .context("failed to create IRQ chip")?,
Zach Reiznerdc748482021-04-14 13:59:30 -07002725 )
2726 }
2727 } else {
2728 ioapic_host_tube = None;
2729 KvmIrqChip::Kernel(
Daniel Verkamp6b298582021-08-16 15:37:11 -07002730 KvmKernelIrqChip::new(vm_clone, components.vcpu_count)
2731 .context("failed to create IRQ chip")?,
Zach Reiznerdc748482021-04-14 13:59:30 -07002732 )
2733 };
2734
2735 run_vm::<KvmVcpu, KvmVm>(cfg, components, vm, irq_chip.as_mut(), ioapic_host_tube)
2736}
2737
2738fn run_vm<Vcpu, V>(
Zach Reiznera90649a2021-03-31 12:56:08 -07002739 cfg: Config,
2740 #[allow(unused_mut)] mut components: VmComponents,
Zach Reiznerdc748482021-04-14 13:59:30 -07002741 mut vm: V,
2742 irq_chip: &mut dyn IrqChipArch,
2743 ioapic_host_tube: Option<Tube>,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002744) -> Result<ExitState>
Zach Reiznera90649a2021-03-31 12:56:08 -07002745where
2746 Vcpu: VcpuArch + 'static,
2747 V: VmArch + 'static,
Zach Reiznera90649a2021-03-31 12:56:08 -07002748{
2749 if cfg.sandbox {
2750 // Printing something to the syslog before entering minijail so that libc's syslogger has a
2751 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
2752 // access to those files will not be possible.
2753 info!("crosvm entering multiprocess mode");
2754 }
2755
Daniel Verkampf1439d42021-05-21 13:55:10 -07002756 #[cfg(feature = "usb")]
Zach Reiznera90649a2021-03-31 12:56:08 -07002757 let (usb_control_tube, usb_provider) =
Daniel Verkamp6b298582021-08-16 15:37:11 -07002758 HostBackendDeviceProvider::new().context("failed to create usb provider")?;
Daniel Verkampf1439d42021-05-21 13:55:10 -07002759
Zach Reiznera90649a2021-03-31 12:56:08 -07002760 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
2761 // before any jailed devices have been spawned, so that we can catch any of them that fail very
2762 // quickly.
Daniel Verkamp6b298582021-08-16 15:37:11 -07002763 let sigchld_fd = SignalFd::new(libc::SIGCHLD).context("failed to create signalfd")?;
Dylan Reid059a1882018-07-23 17:58:09 -07002764
Zach Reiznera60744b2019-02-13 17:33:32 -08002765 let control_server_socket = match &cfg.socket_path {
2766 Some(path) => Some(UnlinkUnixSeqpacketListener(
Daniel Verkamp6b298582021-08-16 15:37:11 -07002767 UnixSeqpacketListener::bind(path).context("failed to create control server")?,
Zach Reiznera60744b2019-02-13 17:33:32 -08002768 )),
2769 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07002770 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002771
Zach Reiznera90649a2021-03-31 12:56:08 -07002772 let mut control_tubes = Vec::new();
2773
2774 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2775 if let Some(port) = cfg.gdb {
2776 // GDB needs a control socket to interrupt vcpus.
Daniel Verkamp6b298582021-08-16 15:37:11 -07002777 let (gdb_host_tube, gdb_control_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznera90649a2021-03-31 12:56:08 -07002778 control_tubes.push(TaggedControlTube::Vm(gdb_host_tube));
2779 components.gdb = Some((port, gdb_control_tube));
2780 }
2781
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09002782 for wl_cfg in &cfg.vhost_user_wl {
2783 let wayland_host_tube = UnixSeqpacket::connect(&wl_cfg.vm_tube)
2784 .map(Tube::new)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002785 .context("failed to connect to wayland tube")?;
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09002786 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
2787 }
2788
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002789 let mut vhost_user_gpu_tubes = Vec::with_capacity(cfg.vhost_user_gpu.len());
2790 for _ in 0..cfg.vhost_user_gpu.len() {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002791 let (host_tube, device_tube) = Tube::pair().context("failed to create tube")?;
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002792 vhost_user_gpu_tubes.push((
Daniel Verkamp6b298582021-08-16 15:37:11 -07002793 host_tube.try_clone().context("failed to clone tube")?,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002794 device_tube,
2795 ));
2796 control_tubes.push(TaggedControlTube::VmMemory(host_tube));
2797 }
2798
Daniel Verkamp6b298582021-08-16 15:37:11 -07002799 let (wayland_host_tube, wayland_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002800 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
Dylan Reid059a1882018-07-23 17:58:09 -07002801 // Balloon gets a special socket so balloon requests can be forwarded from the main process.
Daniel Verkamp6b298582021-08-16 15:37:11 -07002802 let (balloon_host_tube, balloon_device_tube) = Tube::pair().context("failed to create tube")?;
Hikaru Nishidaaf3f3bb2021-05-21 12:03:54 +09002803 // Set recv timeout to avoid deadlock on sending BalloonControlCommand before guest is ready.
2804 balloon_host_tube
2805 .set_recv_timeout(Some(Duration::from_millis(100)))
Daniel Verkamp6b298582021-08-16 15:37:11 -07002806 .context("failed to create tube")?;
Dylan Reid059a1882018-07-23 17:58:09 -07002807
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002808 // Create one control socket per disk.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002809 let mut disk_device_tubes = Vec::new();
2810 let mut disk_host_tubes = Vec::new();
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002811 let disk_count = cfg.disks.len();
2812 for _ in 0..disk_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002813 let (disk_host_tub, disk_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002814 disk_host_tubes.push(disk_host_tub);
2815 disk_device_tubes.push(disk_device_tube);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002816 }
2817
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002818 let mut pmem_device_tubes = Vec::new();
Daniel Verkampe1980a92020-02-07 11:00:55 -08002819 let pmem_count = cfg.pmem_devices.len();
2820 for _ in 0..pmem_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002821 let (pmem_host_tube, pmem_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002822 pmem_device_tubes.push(pmem_device_tube);
2823 control_tubes.push(TaggedControlTube::VmMsync(pmem_host_tube));
Daniel Verkampe1980a92020-02-07 11:00:55 -08002824 }
2825
Daniel Verkamp6b298582021-08-16 15:37:11 -07002826 let (gpu_host_tube, gpu_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002827 control_tubes.push(TaggedControlTube::VmMemory(gpu_host_tube));
Gurchetan Singh96beafc2019-05-15 09:46:52 -07002828
Zach Reiznerdc748482021-04-14 13:59:30 -07002829 if let Some(ioapic_host_tube) = ioapic_host_tube {
2830 control_tubes.push(TaggedControlTube::VmIrq(ioapic_host_tube));
2831 }
Zhuocheng Dingf2e90bf2019-12-02 15:50:20 +08002832
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002833 let battery = if cfg.battery_type.is_some() {
Daniel Verkampcfe49462021-08-19 17:11:05 -07002834 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(clippy::manual_map))]
Alex Lauf408c732020-11-10 18:24:04 +09002835 let jail = match simple_jail(&cfg, "battery")? {
Daniel Verkampcfe49462021-08-19 17:11:05 -07002836 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(unused_mut))]
Alex Lauf408c732020-11-10 18:24:04 +09002837 Some(mut jail) => {
2838 // Setup a bind mount to the system D-Bus socket if the powerd monitor is used.
2839 #[cfg(feature = "power-monitor-powerd")]
2840 {
Fergus Dall51200512021-08-19 12:54:26 +10002841 add_current_user_to_jail(&mut jail)?;
Alex Lauf408c732020-11-10 18:24:04 +09002842
2843 // Create a tmpfs in the device's root directory so that we can bind mount files.
2844 jail.mount_with_data(
2845 Path::new("none"),
2846 Path::new("/"),
2847 "tmpfs",
2848 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
2849 "size=67108864",
2850 )?;
2851
2852 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
2853 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
2854 }
2855 Some(jail)
2856 }
2857 None => None,
2858 };
2859 (&cfg.battery_type, jail)
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002860 } else {
2861 (&cfg.battery_type, None)
2862 };
2863
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002864 let map_request: Arc<Mutex<Option<ExternalMapping>>> = Arc::new(Mutex::new(None));
2865
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002866 let fs_count = cfg
2867 .shared_dirs
2868 .iter()
2869 .filter(|sd| sd.kind == SharedDirKind::FS)
2870 .count();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002871 let mut fs_device_tubes = Vec::with_capacity(fs_count);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002872 for _ in 0..fs_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002873 let (fs_host_tube, fs_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002874 control_tubes.push(TaggedControlTube::Fs(fs_host_tube));
2875 fs_device_tubes.push(fs_device_tube);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002876 }
2877
Daniel Verkamp6b298582021-08-16 15:37:11 -07002878 let exit_evt = Event::new().context("failed to create event")?;
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002879 let reset_evt = Event::new().context("failed to create event")?;
Daniel Verkamp6f4f8222022-01-05 14:09:09 -08002880 let mut sys_allocator = Arch::create_system_allocator(&vm);
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09002881
2882 // Allocate the ramoops region first. AArch64::build_vm() assumes this.
2883 let ramoops_region = match &components.pstore {
2884 Some(pstore) => Some(
Dennis Kempin65740a62021-10-18 16:46:57 -07002885 arch::pstore::create_memory_region(&mut vm, &mut sys_allocator, pstore)
Daniel Verkamp6b298582021-08-16 15:37:11 -07002886 .context("failed to allocate pstore region")?,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09002887 ),
2888 None => None,
2889 };
2890
Daniel Verkamp891ea3e2022-01-04 12:35:55 -08002891 let phys_max_addr = (1u64 << vm.get_guest_phys_addr_bits()) - 1;
Tomasz Nowickiab86d522021-09-22 05:50:46 +00002892 let mut devices = create_devices(
Zach Reiznerdc748482021-04-14 13:59:30 -07002893 &cfg,
2894 &mut vm,
2895 &mut sys_allocator,
2896 &exit_evt,
Zide Chen71435c12021-03-03 15:02:02 -08002897 phys_max_addr,
Zach Reiznerdc748482021-04-14 13:59:30 -07002898 &mut control_tubes,
2899 wayland_device_tube,
2900 gpu_device_tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09002901 vhost_user_gpu_tubes,
Zach Reiznerdc748482021-04-14 13:59:30 -07002902 balloon_device_tube,
2903 &mut disk_device_tubes,
2904 &mut pmem_device_tubes,
2905 &mut fs_device_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07002906 #[cfg(feature = "usb")]
Zach Reiznerdc748482021-04-14 13:59:30 -07002907 usb_provider,
2908 Arc::clone(&map_request),
2909 )?;
2910
Peter Fangc2bba082021-04-19 18:40:24 -07002911 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Tomasz Nowickiab86d522021-09-22 05:50:46 +00002912 for device in devices
2913 .iter_mut()
2914 .filter_map(|(dev, _)| dev.as_pci_device_mut())
2915 {
Peter Fangc2bba082021-04-19 18:40:24 -07002916 let sdts = device
2917 .generate_acpi(components.acpi_sdts)
2918 .or_else(|| {
2919 error!("ACPI table generation error");
2920 None
2921 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002922 .ok_or_else(|| anyhow!("failed to generate ACPI table"))?;
Peter Fangc2bba082021-04-19 18:40:24 -07002923 components.acpi_sdts = sdts;
2924 }
2925
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002926 // KVM_CREATE_VCPU uses apic id for x86 and uses cpu id for others.
2927 let mut kvm_vcpu_ids = Vec::new();
2928
Kuo-Hsin Yang6139da62021-04-14 16:55:24 +08002929 #[cfg_attr(not(feature = "direct"), allow(unused_mut))]
Zach Reiznerdc748482021-04-14 13:59:30 -07002930 let mut linux = Arch::build_vm::<V, Vcpu>(
Trent Begin17ccaad2019-04-17 13:51:25 -06002931 components,
Zach Reiznerdc748482021-04-14 13:59:30 -07002932 &exit_evt,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002933 &reset_evt,
Zach Reiznerdc748482021-04-14 13:59:30 -07002934 &mut sys_allocator,
Trent Begin17ccaad2019-04-17 13:51:25 -06002935 &cfg.serial_parameters,
Matt Delco45caf912019-11-13 08:11:09 -08002936 simple_jail(&cfg, "serial")?,
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002937 battery,
Zach Reiznera90649a2021-03-31 12:56:08 -07002938 vm,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09002939 ramoops_region,
Tomasz Nowickiab86d522021-09-22 05:50:46 +00002940 devices,
Zach Reiznerdc748482021-04-14 13:59:30 -07002941 irq_chip,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08002942 &mut kvm_vcpu_ids,
Trent Begin17ccaad2019-04-17 13:51:25 -06002943 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07002944 .context("the architecture failed to build the vm")?;
Lepton Wu60893882018-11-21 11:06:18 -08002945
Daniel Verkamp1286b482021-11-30 15:14:16 -08002946 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2947 {
2948 // Create Pcie Root Port
2949 let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new()));
2950 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
2951 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
2952 let sec_bus = (1..255)
2953 .find(|&bus_num| sys_allocator.pci_bus_empty(bus_num))
2954 .context("failed to find empty bus for Pci hotplug")?;
2955 let pci_bridge = Box::new(PciBridge::new(
2956 pcie_root_port.clone(),
2957 msi_device_tube,
2958 0,
2959 sec_bus,
2960 ));
2961 Arch::register_pci_device(&mut linux, pci_bridge, None, &mut sys_allocator)
2962 .context("Failed to configure pci bridge device")?;
2963 linux.hotplug_bus.push(pcie_root_port);
2964 }
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08002965
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08002966 #[cfg(feature = "direct")]
2967 if let Some(pmio) = &cfg.direct_pmio {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002968 let direct_io = Arc::new(
2969 devices::DirectIo::new(&pmio.path, false).context("failed to open direct io device")?,
2970 );
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08002971 for range in pmio.ranges.iter() {
2972 linux
2973 .io_bus
Junichi Uekawab180f9c2021-12-07 09:21:36 +09002974 .insert_sync(direct_io.clone(), range.base, range.len)
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08002975 .unwrap();
2976 }
2977 };
2978
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002979 #[cfg(feature = "direct")]
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07002980 if let Some(mmio) = &cfg.direct_mmio {
Xiong Zhang46471a02021-11-12 00:34:42 +08002981 let direct_mmio = Arc::new(
Junichi Uekawab180f9c2021-12-07 09:21:36 +09002982 devices::DirectMmio::new(&mmio.path, false, &mmio.ranges)
Xiong Zhang46471a02021-11-12 00:34:42 +08002983 .context("failed to open direct mmio device")?,
Daniel Verkamp6b298582021-08-16 15:37:11 -07002984 );
Xiong Zhang46471a02021-11-12 00:34:42 +08002985
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07002986 for range in mmio.ranges.iter() {
2987 linux
2988 .mmio_bus
Junichi Uekawab180f9c2021-12-07 09:21:36 +09002989 .insert_sync(direct_mmio.clone(), range.base, range.len)
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07002990 .unwrap();
2991 }
2992 };
2993
2994 #[cfg(feature = "direct")]
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002995 let mut irqs = Vec::new();
2996
2997 #[cfg(feature = "direct")]
2998 for irq in &cfg.direct_level_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07002999 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08003000 warn!("irq {} already reserved.", irq);
3001 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07003002 let trigger = Event::new().context("failed to create event")?;
3003 let resample = Event::new().context("failed to create event")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08003004 linux
3005 .irq_chip
3006 .register_irq_event(*irq, &trigger, Some(&resample))
3007 .unwrap();
Daniel Verkamp6b298582021-08-16 15:37:11 -07003008 let direct_irq = devices::DirectIrq::new(trigger, Some(resample))
3009 .context("failed to enable interrupt forwarding")?;
3010 direct_irq
3011 .irq_enable(*irq)
3012 .context("failed to enable interrupt forwarding")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08003013 irqs.push(direct_irq);
3014 }
3015
3016 #[cfg(feature = "direct")]
3017 for irq in &cfg.direct_edge_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07003018 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08003019 warn!("irq {} already reserved.", irq);
3020 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07003021 let trigger = Event::new().context("failed to create event")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08003022 linux
3023 .irq_chip
3024 .register_irq_event(*irq, &trigger, None)
3025 .unwrap();
Daniel Verkamp6b298582021-08-16 15:37:11 -07003026 let direct_irq = devices::DirectIrq::new(trigger, None)
3027 .context("failed to enable interrupt forwarding")?;
3028 direct_irq
3029 .irq_enable(*irq)
3030 .context("failed to enable interrupt forwarding")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08003031 irqs.push(direct_irq);
3032 }
3033
Daniel Verkamp6b298582021-08-16 15:37:11 -07003034 let gralloc = RutabagaGralloc::new().context("failed to create gralloc")?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08003035 run_control(
3036 linux,
Zach Reiznerdc748482021-04-14 13:59:30 -07003037 sys_allocator,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003038 cfg,
Zach Reiznera60744b2019-02-13 17:33:32 -08003039 control_server_socket,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003040 control_tubes,
3041 balloon_host_tube,
3042 &disk_host_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07003043 #[cfg(feature = "usb")]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003044 usb_control_tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07003045 exit_evt,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003046 reset_evt,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08003047 sigchld_fd,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08003048 Arc::clone(&map_request),
Gurchetan Singh293913c2020-12-09 10:44:13 -08003049 gralloc,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08003050 kvm_vcpu_ids,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08003051 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07003052}
3053
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003054fn get_hp_bus<V: VmArch, Vcpu: VcpuArch>(
3055 linux: &RunnableLinuxVm<V, Vcpu>,
3056 host_addr: PciAddress,
3057) -> Result<(Arc<Mutex<dyn HotPlugBus>>, u8)> {
3058 for hp_bus in linux.hotplug_bus.iter() {
3059 if let Some(number) = hp_bus.lock().is_match(host_addr) {
3060 return Ok((hp_bus.clone(), number));
3061 }
3062 }
3063 Err(anyhow!("Failed to find a suitable hotplug bus"))
3064}
3065
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08003066fn add_vfio_device<V: VmArch, Vcpu: VcpuArch>(
3067 linux: &mut RunnableLinuxVm<V, Vcpu>,
3068 sys_allocator: &mut SystemAllocator,
3069 cfg: &Config,
3070 control_tubes: &mut Vec<TaggedControlTube>,
3071 vfio_path: &Path,
3072) -> Result<()> {
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003073 let host_os_str = vfio_path
3074 .file_name()
3075 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
3076 let host_str = host_os_str
3077 .to_str()
3078 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
3079 let host_addr = PciAddress::from_string(host_str);
3080
3081 let (hp_bus, bus_num) = get_hp_bus(linux, host_addr)?;
3082
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08003083 let mut endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>> = BTreeMap::new();
3084 let (vfio_pci_device, jail) = create_vfio_device(
3085 cfg,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003086 &linux.vm,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08003087 sys_allocator,
3088 control_tubes,
3089 vfio_path,
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003090 Some(bus_num),
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08003091 &mut endpoints,
3092 false,
3093 )?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003094
3095 let pci_address = Arch::register_pci_device(linux, vfio_pci_device, jail, sys_allocator)
Daniel Verkamp6b298582021-08-16 15:37:11 -07003096 .context("Failed to configure pci hotplug device")?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003097
Daniel Verkamp6b298582021-08-16 15:37:11 -07003098 let host_os_str = vfio_path
3099 .file_name()
3100 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
3101 let host_str = host_os_str
3102 .to_str()
3103 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003104 let host_addr = PciAddress::from_string(host_str);
3105 let host_key = HostHotPlugKey::Vfio { host_addr };
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003106 let mut hp_bus = hp_bus.lock();
3107 hp_bus.add_hotplug_device(host_key, pci_address);
3108 hp_bus.hot_plug(pci_address);
3109 Ok(())
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08003110}
3111
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003112fn remove_vfio_device<V: VmArch, Vcpu: VcpuArch>(
3113 linux: &RunnableLinuxVm<V, Vcpu>,
Xiong Zhang2d45b912021-05-13 16:22:25 +08003114 sys_allocator: &mut SystemAllocator,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003115 vfio_path: &Path,
3116) -> Result<()> {
Daniel Verkamp6b298582021-08-16 15:37:11 -07003117 let host_os_str = vfio_path
3118 .file_name()
3119 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
3120 let host_str = host_os_str
3121 .to_str()
3122 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003123 let host_addr = PciAddress::from_string(host_str);
3124 let host_key = HostHotPlugKey::Vfio { host_addr };
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003125 for hp_bus in linux.hotplug_bus.iter() {
3126 let mut hp_bus_lock = hp_bus.lock();
3127 if let Some(pci_addr) = hp_bus_lock.get_hotplug_device(host_key) {
3128 hp_bus_lock.hot_unplug(pci_addr);
Xiong Zhang2d45b912021-05-13 16:22:25 +08003129 sys_allocator.release_pci(pci_addr.bus, pci_addr.dev, pci_addr.func);
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08003130 return Ok(());
3131 }
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003132 }
3133
Daniel Verkamp6b298582021-08-16 15:37:11 -07003134 Err(anyhow!("HotPlugBus hasn't been implemented"))
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08003135}
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08003136
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003137fn handle_vfio_command<V: VmArch, Vcpu: VcpuArch>(
3138 linux: &mut RunnableLinuxVm<V, Vcpu>,
3139 sys_allocator: &mut SystemAllocator,
3140 cfg: &Config,
3141 add_tubes: &mut Vec<TaggedControlTube>,
3142 vfio_path: &Path,
3143 add: bool,
3144) -> VmResponse {
3145 let ret = if add {
3146 add_vfio_device(linux, sys_allocator, cfg, add_tubes, vfio_path)
3147 } else {
3148 remove_vfio_device(linux, sys_allocator, vfio_path)
3149 };
3150
3151 match ret {
3152 Ok(()) => VmResponse::Ok,
3153 Err(e) => {
3154 error!("hanlde_vfio_command failure: {}", e);
3155 add_tubes.clear();
3156 VmResponse::Err(base::Error::new(libc::EINVAL))
3157 }
3158 }
3159}
3160
Daniel Verkamp29409802021-02-24 14:46:19 -08003161/// Signals all running VCPUs to vmexit, sends VcpuControl message to each VCPU tube, and tells
3162/// `irq_chip` to stop blocking halted VCPUs. The channel message is set first because both the
Steven Richman11dc6712020-09-02 15:39:14 -07003163/// signal and the irq_chip kick could cause the VCPU thread to continue through the VCPU run
3164/// loop.
3165fn kick_all_vcpus(
3166 vcpu_handles: &[(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)],
Zach Reiznerdc748482021-04-14 13:59:30 -07003167 irq_chip: &dyn IrqChip,
Daniel Verkamp29409802021-02-24 14:46:19 -08003168 message: VcpuControl,
Steven Richman11dc6712020-09-02 15:39:14 -07003169) {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003170 for (handle, tube) in vcpu_handles {
Daniel Verkamp29409802021-02-24 14:46:19 -08003171 if let Err(e) = tube.send(message.clone()) {
3172 error!("failed to send VcpuControl: {}", e);
Steven Richman11dc6712020-09-02 15:39:14 -07003173 }
3174 let _ = handle.kill(SIGRTMIN() + 0);
3175 }
3176 irq_chip.kick_halted_vcpus();
3177}
3178
Zach Reiznerdc748482021-04-14 13:59:30 -07003179fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
3180 mut linux: RunnableLinuxVm<V, Vcpu>,
3181 mut sys_allocator: SystemAllocator,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003182 cfg: Config,
Zach Reiznera60744b2019-02-13 17:33:32 -08003183 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003184 mut control_tubes: Vec<TaggedControlTube>,
3185 balloon_host_tube: Tube,
3186 disk_host_tubes: &[Tube],
Daniel Verkampf1439d42021-05-21 13:55:10 -07003187 #[cfg(feature = "usb")] usb_control_tube: Tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07003188 exit_evt: Event,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003189 reset_evt: Event,
Zach Reizner55a9e502018-10-03 10:22:32 -07003190 sigchld_fd: SignalFd,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08003191 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Gurchetan Singh293913c2020-12-09 10:44:13 -08003192 mut gralloc: RutabagaGralloc,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08003193 kvm_vcpu_ids: Vec<usize>,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003194) -> Result<ExitState> {
Zach Reizner5bed0d22018-03-28 02:31:11 -07003195 #[derive(PollToken)]
3196 enum Token {
3197 Exit,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003198 Reset,
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003199 Suspend,
Zach Reizner5bed0d22018-03-28 02:31:11 -07003200 ChildSignal,
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003201 IrqFd { index: IrqEventIndex },
Zach Reiznera60744b2019-02-13 17:33:32 -08003202 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07003203 VmControl { index: usize },
3204 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003205
Zach Reizner19ad1f32019-12-12 18:58:50 -08003206 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08003207 .set_raw_mode()
3208 .expect("failed to set terminal raw mode");
3209
Michael Hoylee392c462020-10-07 03:29:24 -07003210 let wait_ctx = WaitContext::build_with(&[
Zach Reiznerdc748482021-04-14 13:59:30 -07003211 (&exit_evt, Token::Exit),
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003212 (&reset_evt, Token::Reset),
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003213 (&linux.suspend_evt, Token::Suspend),
Zach Reiznerb2110be2019-07-23 15:55:03 -07003214 (&sigchld_fd, Token::ChildSignal),
3215 ])
Daniel Verkamp6b298582021-08-16 15:37:11 -07003216 .context("failed to add descriptor to wait context")?;
Zach Reiznerb2110be2019-07-23 15:55:03 -07003217
Zach Reiznera60744b2019-02-13 17:33:32 -08003218 if let Some(socket_server) = &control_server_socket {
Michael Hoylee392c462020-10-07 03:29:24 -07003219 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08003220 .add(socket_server, Token::VmControlServer)
Daniel Verkamp6b298582021-08-16 15:37:11 -07003221 .context("failed to add descriptor to wait context")?;
Zach Reiznera60744b2019-02-13 17:33:32 -08003222 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003223 for (index, socket) in control_tubes.iter().enumerate() {
Michael Hoylee392c462020-10-07 03:29:24 -07003224 wait_ctx
Zach Reizner55a9e502018-10-03 10:22:32 -07003225 .add(socket.as_ref(), Token::VmControl { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07003226 .context("failed to add descriptor to wait context")?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08003227 }
3228
Steven Richmanf32d0b42020-06-20 21:45:32 -07003229 let events = linux
3230 .irq_chip
3231 .irq_event_tokens()
Daniel Verkamp6b298582021-08-16 15:37:11 -07003232 .context("failed to add descriptor to wait context")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07003233
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003234 for (index, _gsi, evt) in events {
Michael Hoylee392c462020-10-07 03:29:24 -07003235 wait_ctx
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003236 .add(&evt, Token::IrqFd { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07003237 .context("failed to add descriptor to wait context")?;
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08003238 }
3239
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003240 if cfg.sandbox {
Lepton Wu20333e42019-03-14 10:48:03 -07003241 // Before starting VCPUs, in case we started with some capabilities, drop them all.
Daniel Verkamp6b298582021-08-16 15:37:11 -07003242 drop_capabilities().context("failed to drop process capabilities")?;
Lepton Wu20333e42019-03-14 10:48:03 -07003243 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08003244
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003245 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
3246 // Create a channel for GDB thread.
3247 let (to_gdb_channel, from_vcpu_channel) = if linux.gdb.is_some() {
3248 let (s, r) = mpsc::channel();
3249 (Some(s), Some(r))
3250 } else {
3251 (None, None)
3252 };
3253
Steven Richmanf32d0b42020-06-20 21:45:32 -07003254 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
3255 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
Steven Richmanf32d0b42020-06-20 21:45:32 -07003256 let use_hypervisor_signals = !linux
3257 .vm
3258 .get_hypervisor()
3259 .check_capability(&HypervisorCap::ImmediateExit);
Zach Reizner304e7312020-09-29 16:00:24 -07003260 setup_vcpu_signal_handler::<Vcpu>(use_hypervisor_signals)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07003261
Zach Reizner304e7312020-09-29 16:00:24 -07003262 let vcpus: Vec<Option<_>> = match linux.vcpus.take() {
Andrew Walbran9cfdbd92021-01-11 17:40:34 +00003263 Some(vec) => vec.into_iter().map(Some).collect(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07003264 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
3265 };
Yusuke Sato31e136a2021-08-18 11:51:38 -07003266 // Enable core scheduling before creating vCPUs so that the cookie will be
3267 // shared by all vCPU threads.
3268 // TODO(b/199312402): Avoid enabling core scheduling for the crosvm process
3269 // itself for even better performance. Only vCPUs need the feature.
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003270 if cfg.per_vm_core_scheduling {
Yusuke Sato31e136a2021-08-18 11:51:38 -07003271 if let Err(e) = enable_core_scheduling() {
3272 error!("Failed to enable core scheduling: {}", e);
3273 }
3274 }
Daniel Verkamp94c35272019-09-12 13:31:30 -07003275 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Dylan Reidb0492662019-05-17 14:50:13 -07003276 let (to_vcpu_channel, from_main_channel) = mpsc::channel();
Daniel Verkampc677fb42020-09-08 13:47:49 -07003277 let vcpu_affinity = match linux.vcpu_affinity.clone() {
3278 Some(VcpuAffinity::Global(v)) => v,
3279 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(),
3280 None => Default::default(),
3281 };
Zach Reizner55a9e502018-10-03 10:22:32 -07003282 let handle = run_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07003283 cpu_id,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08003284 kvm_vcpu_ids[cpu_id],
Zach Reizner55a9e502018-10-03 10:22:32 -07003285 vcpu,
Daniel Verkamp6b298582021-08-16 15:37:11 -07003286 linux.vm.try_clone().context("failed to clone vm")?,
3287 linux
3288 .irq_chip
3289 .try_box_clone()
3290 .context("failed to clone irqchip")?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003291 linux.vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09003292 linux.rt_cpus.contains(&cpu_id),
Daniel Verkampc677fb42020-09-08 13:47:49 -07003293 vcpu_affinity,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09003294 linux.delay_rt,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09003295 linux.no_smt,
Zach Reizner55a9e502018-10-03 10:22:32 -07003296 vcpu_thread_barrier.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07003297 linux.has_bios,
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07003298 (*linux.io_bus).clone(),
3299 (*linux.mmio_bus).clone(),
Daniel Verkamp6b298582021-08-16 15:37:11 -07003300 exit_evt.try_clone().context("failed to clone event")?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003301 linux.vm.check_capability(VmCap::PvClockSuspend),
Dylan Reidb0492662019-05-17 14:50:13 -07003302 from_main_channel,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003303 use_hypervisor_signals,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003304 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
3305 to_gdb_channel.clone(),
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003306 cfg.per_vm_core_scheduling,
3307 cfg.host_cpu_topology,
Zach Reizner55a9e502018-10-03 10:22:32 -07003308 )?;
Dylan Reidb0492662019-05-17 14:50:13 -07003309 vcpu_handles.push((handle, to_vcpu_channel));
Dylan Reid059a1882018-07-23 17:58:09 -07003310 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07003311
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003312 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
3313 // Spawn GDB thread.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003314 if let Some((gdb_port_num, gdb_control_tube)) = linux.gdb.take() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003315 let to_vcpu_channels = vcpu_handles
3316 .iter()
3317 .map(|(_handle, channel)| channel.clone())
3318 .collect();
3319 let target = GdbStub::new(
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003320 gdb_control_tube,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003321 to_vcpu_channels,
3322 from_vcpu_channel.unwrap(), // Must succeed to unwrap()
3323 );
3324 thread::Builder::new()
3325 .name("gdb".to_owned())
3326 .spawn(move || gdb_thread(target, gdb_port_num))
Daniel Verkamp6b298582021-08-16 15:37:11 -07003327 .context("failed to spawn GDB thread")?;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003328 };
3329
Dylan Reid059a1882018-07-23 17:58:09 -07003330 vcpu_thread_barrier.wait();
3331
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003332 let mut exit_state = ExitState::Stop;
Charles William Dick54045012021-07-27 19:11:53 +09003333 let mut balloon_stats_id: u64 = 0;
3334
Michael Hoylee392c462020-10-07 03:29:24 -07003335 'wait: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07003336 let events = {
Michael Hoylee392c462020-10-07 03:29:24 -07003337 match wait_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08003338 Ok(v) => v,
3339 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08003340 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003341 break;
3342 }
3343 }
3344 };
Zach Reiznera60744b2019-02-13 17:33:32 -08003345
Steven Richmanf32d0b42020-06-20 21:45:32 -07003346 if let Err(e) = linux.irq_chip.process_delayed_irq_events() {
3347 warn!("can't deliver delayed irqs: {}", e);
3348 }
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08003349
Zach Reiznera60744b2019-02-13 17:33:32 -08003350 let mut vm_control_indices_to_remove = Vec::new();
Michael Hoylee392c462020-10-07 03:29:24 -07003351 for event in events.iter().filter(|e| e.is_readable) {
3352 match event.token {
Zach Reizner5bed0d22018-03-28 02:31:11 -07003353 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08003354 info!("vcpu requested shutdown");
Michael Hoylee392c462020-10-07 03:29:24 -07003355 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08003356 }
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003357 Token::Reset => {
3358 info!("vcpu requested reset");
3359 exit_state = ExitState::Reset;
3360 break 'wait;
3361 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003362 Token::Suspend => {
3363 info!("VM requested suspend");
3364 linux.suspend_evt.read().unwrap();
Zach Reiznerdc748482021-04-14 13:59:30 -07003365 kick_all_vcpus(
3366 &vcpu_handles,
3367 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003368 VcpuControl::RunState(VmRunMode::Suspending),
Zach Reiznerdc748482021-04-14 13:59:30 -07003369 );
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003370 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003371 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08003372 // Print all available siginfo structs, then exit the loop.
Daniel Verkamp6b298582021-08-16 15:37:11 -07003373 while let Some(siginfo) =
3374 sigchld_fd.read().context("failed to create signalfd")?
3375 {
Zach Reizner3ba00982019-01-23 19:04:43 -08003376 let pid = siginfo.ssi_pid;
3377 let pid_label = match linux.pid_debug_label_map.get(&pid) {
3378 Some(label) => format!("{} (pid {})", label, pid),
3379 None => format!("pid {}", pid),
3380 };
David Tolnayf5032762018-12-03 10:46:45 -08003381 error!(
3382 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08003383 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08003384 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08003385 }
Michael Hoylee392c462020-10-07 03:29:24 -07003386 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08003387 }
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003388 Token::IrqFd { index } => {
3389 if let Err(e) = linux.irq_chip.service_irq_event(index) {
3390 error!("failed to signal irq {}: {}", index, e);
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08003391 }
3392 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003393 Token::VmControlServer => {
3394 if let Some(socket_server) = &control_server_socket {
3395 match socket_server.accept() {
3396 Ok(socket) => {
Michael Hoylee392c462020-10-07 03:29:24 -07003397 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08003398 .add(
3399 &socket,
3400 Token::VmControl {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003401 index: control_tubes.len(),
Zach Reiznera60744b2019-02-13 17:33:32 -08003402 },
3403 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07003404 .context("failed to add descriptor to wait context")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003405 control_tubes.push(TaggedControlTube::Vm(Tube::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08003406 }
3407 Err(e) => error!("failed to accept socket: {}", e),
3408 }
3409 }
3410 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003411 Token::VmControl { index } => {
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003412 let mut add_tubes = Vec::new();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003413 if let Some(socket) = control_tubes.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003414 match socket {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003415 TaggedControlTube::Vm(tube) => match tube.recv::<VmRequest>() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003416 Ok(request) => {
3417 let mut run_mode_opt = None;
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003418 let response = match request {
3419 VmRequest::VfioCommand { vfio_path, add } => {
3420 handle_vfio_command(
3421 &mut linux,
3422 &mut sys_allocator,
3423 &cfg,
3424 &mut add_tubes,
3425 &vfio_path,
3426 add,
3427 )
3428 }
3429 _ => request.execute(
3430 &mut run_mode_opt,
3431 &balloon_host_tube,
3432 &mut balloon_stats_id,
3433 disk_host_tubes,
3434 #[cfg(feature = "usb")]
3435 Some(&usb_control_tube),
3436 #[cfg(not(feature = "usb"))]
3437 None,
3438 &mut linux.bat_control,
3439 &vcpu_handles,
3440 ),
3441 };
3442
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003443 if let Err(e) = tube.send(&response) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003444 error!("failed to send VmResponse: {}", e);
3445 }
3446 if let Some(run_mode) = run_mode_opt {
3447 info!("control socket changed run mode to {}", run_mode);
3448 match run_mode {
3449 VmRunMode::Exiting => {
Michael Hoylee392c462020-10-07 03:29:24 -07003450 break 'wait;
Jakub Starond99cd0a2019-04-11 14:09:39 -07003451 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003452 other => {
Chuanxiao Dong2bbe85c2020-11-12 17:18:07 +08003453 if other == VmRunMode::Running {
Daniel Verkampda4e8a92021-07-21 13:49:02 -07003454 for dev in &linux.resume_notify_devices {
3455 dev.lock().resume_imminent();
3456 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003457 }
Steven Richman11dc6712020-09-02 15:39:14 -07003458 kick_all_vcpus(
3459 &vcpu_handles,
Zach Reiznerdc748482021-04-14 13:59:30 -07003460 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003461 VcpuControl::RunState(other),
Steven Richman11dc6712020-09-02 15:39:14 -07003462 );
Zach Reizner6a8fdd92019-01-16 14:38:41 -08003463 }
3464 }
3465 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003466 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003467 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003468 if let TubeError::Disconnected = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003469 vm_control_indices_to_remove.push(index);
3470 } else {
3471 error!("failed to recv VmRequest: {}", e);
3472 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003473 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003474 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003475 TaggedControlTube::VmMemory(tube) => {
3476 match tube.recv::<VmMemoryRequest>() {
3477 Ok(request) => {
3478 let response = request.execute(
3479 &mut linux.vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07003480 &mut sys_allocator,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003481 Arc::clone(&map_request),
3482 &mut gralloc,
3483 );
3484 if let Err(e) = tube.send(&response) {
3485 error!("failed to send VmMemoryControlResponse: {}", e);
3486 }
3487 }
3488 Err(e) => {
3489 if let TubeError::Disconnected = e {
3490 vm_control_indices_to_remove.push(index);
3491 } else {
3492 error!("failed to recv VmMemoryControlRequest: {}", e);
3493 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003494 }
3495 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003496 }
3497 TaggedControlTube::VmIrq(tube) => match tube.recv::<VmIrqRequest>() {
Xiong Zhang2515b752019-09-19 10:29:02 +08003498 Ok(request) => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07003499 let response = {
3500 let irq_chip = &mut linux.irq_chip;
3501 request.execute(
3502 |setup| match setup {
3503 IrqSetup::Event(irq, ev) => {
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003504 if let Some(event_index) = irq_chip
3505 .register_irq_event(irq, ev, None)?
3506 {
3507 match wait_ctx.add(
3508 ev,
3509 Token::IrqFd {
3510 index: event_index
3511 },
3512 ) {
3513 Err(e) => {
3514 warn!("failed to add IrqFd to poll context: {}", e);
3515 Err(e)
3516 },
3517 Ok(_) => {
3518 Ok(())
3519 }
3520 }
3521 } else {
3522 Ok(())
3523 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07003524 }
3525 IrqSetup::Route(route) => irq_chip.route_irq(route),
Xiong Zhang4fbc5542021-06-01 11:29:14 +08003526 IrqSetup::UnRegister(irq, ev) => irq_chip.unregister_irq_event(irq, ev),
Steven Richmanf32d0b42020-06-20 21:45:32 -07003527 },
Zach Reiznerdc748482021-04-14 13:59:30 -07003528 &mut sys_allocator,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003529 )
3530 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003531 if let Err(e) = tube.send(&response) {
Xiong Zhang2515b752019-09-19 10:29:02 +08003532 error!("failed to send VmIrqResponse: {}", e);
3533 }
3534 }
3535 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003536 if let TubeError::Disconnected = e {
Xiong Zhang2515b752019-09-19 10:29:02 +08003537 vm_control_indices_to_remove.push(index);
3538 } else {
3539 error!("failed to recv VmIrqRequest: {}", e);
3540 }
3541 }
3542 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003543 TaggedControlTube::VmMsync(tube) => {
3544 match tube.recv::<VmMsyncRequest>() {
3545 Ok(request) => {
3546 let response = request.execute(&mut linux.vm);
3547 if let Err(e) = tube.send(&response) {
3548 error!("failed to send VmMsyncResponse: {}", e);
3549 }
3550 }
3551 Err(e) => {
3552 if let TubeError::Disconnected = e {
3553 vm_control_indices_to_remove.push(index);
3554 } else {
3555 error!("failed to recv VmMsyncRequest: {}", e);
3556 }
Daniel Verkampe1980a92020-02-07 11:00:55 -08003557 }
3558 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003559 }
3560 TaggedControlTube::Fs(tube) => match tube.recv::<FsMappingRequest>() {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003561 Ok(request) => {
3562 let response =
Zach Reiznerdc748482021-04-14 13:59:30 -07003563 request.execute(&mut linux.vm, &mut sys_allocator);
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003564 if let Err(e) = tube.send(&response) {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003565 error!("failed to send VmResponse: {}", e);
3566 }
3567 }
3568 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003569 if let TubeError::Disconnected = e {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003570 vm_control_indices_to_remove.push(index);
3571 } else {
3572 error!("failed to recv VmResponse: {}", e);
3573 }
3574 }
3575 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08003576 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003577 }
Xiong Zhangc78e72b2021-04-08 11:31:41 +08003578 if !add_tubes.is_empty() {
3579 for (idx, socket) in add_tubes.iter().enumerate() {
3580 wait_ctx
3581 .add(
3582 socket.as_ref(),
3583 Token::VmControl {
3584 index: idx + control_tubes.len(),
3585 },
3586 )
3587 .context(
3588 "failed to add hotplug vfio-pci descriptor ot wait context",
3589 )?;
3590 }
3591 control_tubes.append(&mut add_tubes);
3592 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003593 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003594 }
3595 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003596
Vikram Auradkarede68c72021-07-01 14:33:54 -07003597 // It's possible more data is readable and buffered while the socket is hungup,
3598 // so don't delete the tube from the poll context until we're sure all the
3599 // data is read.
3600 // Below case covers a condition where we have received a hungup event and the tube is not
3601 // readable.
3602 // In case of readable tube, once all data is read, any attempt to read more data on hungup
3603 // tube should fail. On such failure, we get Disconnected error and index gets added to
3604 // vm_control_indices_to_remove by the time we reach here.
3605 for event in events.iter().filter(|e| e.is_hungup && !e.is_readable) {
3606 if let Token::VmControl { index } = event.token {
3607 vm_control_indices_to_remove.push(index);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003608 }
3609 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003610
3611 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08003612 // preserves correct indexes as each element is removed.
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07003613 vm_control_indices_to_remove.sort_unstable_by_key(|&k| Reverse(k));
Zach Reiznera60744b2019-02-13 17:33:32 -08003614 vm_control_indices_to_remove.dedup();
3615 for index in vm_control_indices_to_remove {
Michael Hoylee392c462020-10-07 03:29:24 -07003616 // Delete the socket from the `wait_ctx` synchronously. Otherwise, the kernel will do
3617 // this automatically when the FD inserted into the `wait_ctx` is closed after this
Zide Chen89584072019-11-14 10:33:51 -08003618 // if-block, but this removal can be deferred unpredictably. In some instances where the
Michael Hoylee392c462020-10-07 03:29:24 -07003619 // system is under heavy load, we can even get events returned by `wait_ctx` for an FD
Zide Chen89584072019-11-14 10:33:51 -08003620 // that has already been closed. Because the token associated with that spurious event
3621 // now belongs to a different socket, the control loop will start to interact with
3622 // sockets that might not be ready to use. This can cause incorrect hangup detection or
3623 // blocking on a socket that will never be ready. See also: crbug.com/1019986
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003624 if let Some(socket) = control_tubes.get(index) {
Daniel Verkamp6b298582021-08-16 15:37:11 -07003625 wait_ctx
3626 .delete(socket)
3627 .context("failed to remove descriptor from wait context")?;
Zide Chen89584072019-11-14 10:33:51 -08003628 }
3629
3630 // This line implicitly drops the socket at `index` when it gets returned by
3631 // `swap_remove`. After this line, the socket at `index` is not the one from
3632 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
Michael Hoylee392c462020-10-07 03:29:24 -07003633 // use `wait_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003634 control_tubes.swap_remove(index);
3635 if let Some(tube) = control_tubes.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07003636 wait_ctx
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003637 .modify(tube, EventType::Read, Token::VmControl { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07003638 .context("failed to add descriptor to wait context")?;
Zach Reiznera60744b2019-02-13 17:33:32 -08003639 }
3640 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003641 }
3642
Zach Reiznerdc748482021-04-14 13:59:30 -07003643 kick_all_vcpus(
3644 &vcpu_handles,
3645 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003646 VcpuControl::RunState(VmRunMode::Exiting),
Zach Reiznerdc748482021-04-14 13:59:30 -07003647 );
Steven Richman11dc6712020-09-02 15:39:14 -07003648 for (handle, _) in vcpu_handles {
3649 if let Err(e) = handle.join() {
3650 error!("failed to join vcpu thread: {:?}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003651 }
3652 }
3653
Daniel Verkamp94c35272019-09-12 13:31:30 -07003654 // Explicitly drop the VM structure here to allow the devices to clean up before the
3655 // control sockets are closed when this function exits.
3656 mem::drop(linux);
3657
Zach Reizner19ad1f32019-12-12 18:58:50 -08003658 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08003659 .set_canon_mode()
3660 .expect("failed to restore canonical mode for terminal");
3661
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08003662 Ok(exit_state)
Zach Reizner39aa26b2017-12-12 18:03:23 -08003663}