blob: 21208ca1c445d74c3c47e30e22156994fea14133 [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Chuanxiao Dongcb03ec62022-01-20 08:25:38 +08005use std::cmp::{max, Reverse};
Daniel Verkamp5586ff52022-02-24 16:34:55 -08006use std::collections::{BTreeMap, BTreeSet};
Anton Romanov5acc0f52022-01-28 00:18:11 +00007use std::convert::TryInto;
Dylan Reid059a1882018-07-23 17:58:09 -07008use std::fs::{File, OpenOptions};
Vineeth Pillai2b6855e2022-01-12 16:57:22 +00009use std::io::prelude::*;
Federico 'Morg' Pareschia1184822021-09-09 10:52:58 +090010use std::io::stdin;
Steven Richmanf32d0b42020-06-20 21:45:32 -070011use std::iter;
Daniel Verkamp94c35272019-09-12 13:31:30 -070012use std::mem;
Haiwei Li09b7b8e2022-02-18 18:16:05 +080013use std::ops::RangeInclusive;
Anton Romanovd43ae3c2022-01-31 17:32:54 +000014#[cfg(feature = "gpu")]
15use std::os::unix::net::UnixStream;
16use std::os::unix::prelude::OpenOptionsExt;
Xiong Zhang626f0142022-03-12 16:05:17 +080017use std::path::Path;
Dylan Reidb0492662019-05-17 14:50:13 -070018use std::sync::{mpsc, Arc, Barrier};
Hikaru Nishida584e52c2021-04-27 17:37:08 +090019use std::time::Duration;
Dylan Reidb0492662019-05-17 14:50:13 -070020
Vineeth Pillai2b6855e2022-01-12 16:57:22 +000021use std::process;
Anton Romanov5acc0f52022-01-28 00:18:11 +000022#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reizner39aa26b2017-12-12 18:03:23 -080023use std::thread;
Zach Reizner39aa26b2017-12-12 18:03:23 -080024
Alexandre Courbotc6ad83f2022-02-07 19:45:31 +090025use devices::virtio::vhost::vsock::{VhostVsockConfig, VhostVsockDeviceParameter};
Anton Romanov5acc0f52022-01-28 00:18:11 +000026use libc;
Zach Reizner39aa26b2017-12-12 18:03:23 -080027
Tomasz Jeznach42644642020-05-20 23:27:59 -070028use acpi_tables::sdt::SDT;
29
Daniel Verkamp6b298582021-08-16 15:37:11 -070030use anyhow::{anyhow, bail, Context, Result};
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080031use base::*;
Daniel Verkamp578e7cc2022-03-01 22:34:52 -080032use base::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Anton Romanov5acc0f52022-01-28 00:18:11 +000033use devices::serial_device::SerialHardware;
Zide Chenafdb9382021-06-17 12:04:43 -070034use devices::vfio::{VfioCommonSetup, VfioCommonTrait};
Woody Chow055b81b2022-01-25 18:34:29 +090035use devices::virtio::memory_mapper::MemoryMapperTrait;
Anton Romanovd43ae3c2022-01-31 17:32:54 +000036#[cfg(feature = "gpu")]
Anton Romanov5acc0f52022-01-28 00:18:11 +000037use devices::virtio::{self, EventDevice};
paulhsiace17e6e2020-08-28 18:37:45 +080038#[cfg(feature = "audio")]
39use devices::Ac97Dev;
Xiong Zhang17b0daf2019-04-23 17:14:50 +080040use devices::{
Anton Romanov5acc0f52022-01-28 00:18:11 +000041 self, BusDeviceObj, HostHotPlugKey, HotPlugBus, IrqEventIndex, KvmKernelIrqChip, PciAddress,
Xiong Zhang626f0142022-03-12 16:05:17 +080042 PciDevice, PvPanicCode, PvPanicPciDevice, StubPciDevice, VirtioPciDevice,
Xiong Zhang17b0daf2019-04-23 17:14:50 +080043};
Chuanxiao Donga8d427b2022-01-07 10:26:24 +080044use devices::{CoIommuDev, IommuDevType};
Daniel Verkampf1439d42021-05-21 13:55:10 -070045#[cfg(feature = "usb")]
46use devices::{HostBackendDeviceProvider, XhciController};
Steven Richmanf32d0b42020-06-20 21:45:32 -070047use hypervisor::kvm::{Kvm, KvmVcpu, KvmVm};
Anton Romanov5acc0f52022-01-28 00:18:11 +000048use hypervisor::{HypervisorCap, ProtectionType, Vm, VmCap};
Allen Webbf3024c82020-06-19 07:19:48 -070049use minijail::{self, Minijail};
Anton Romanov5acc0f52022-01-28 00:18:11 +000050use resources::{Alloc, SystemAllocator};
Gurchetan Singh293913c2020-12-09 10:44:13 -080051use rutabaga_gfx::RutabagaGralloc;
Dylan Reidb0492662019-05-17 14:50:13 -070052use sync::Mutex;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080053use vm_control::*;
Sergey Senozhatskyd78d05b2021-04-13 20:59:58 +090054use vm_memory::{GuestAddress, GuestMemory, MemoryPolicy};
Zach Reizner39aa26b2017-12-12 18:03:23 -080055
Keiichi Watanabec5262e92020-10-21 15:57:33 +090056#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
57use crate::gdb::{gdb_thread, GdbStub};
Daniel Verkamp5586ff52022-02-24 16:34:55 -080058use crate::{Config, Executable, FileBackedMappingParameters, SharedDir, SharedDirKind, VfioType};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070059use arch::{
Keiichi Watanabe553d2192021-08-16 16:42:27 +090060 self, LinuxArch, RunnableLinuxVm, VcpuAffinity, VirtioDeviceStub, VmComponents, VmImage,
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070061};
Sonny Raoed517d12018-02-13 22:09:43 -080062
Xiong Zhang626f0142022-03-12 16:05:17 +080063#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
64use {
65 crate::HostPcieRootPortParameters,
66 devices::{
67 IrqChipX86_64 as IrqChipArch, KvmSplitIrqChip, PciBridge, PcieHostRootPort, PcieRootPort,
68 },
69 hypervisor::{VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
70 x86_64::X8664arch as Arch,
71};
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080072#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070073use {
74 aarch64::AArch64 as Arch,
Steven Richman11dc6712020-09-02 15:39:14 -070075 devices::IrqChipAArch64 as IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -070076 hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
77};
Zach Reizner39aa26b2017-12-12 18:03:23 -080078
Anton Romanov5acc0f52022-01-28 00:18:11 +000079mod device_helpers;
80use device_helpers::*;
Anton Romanovdb0f4d62022-03-23 21:24:29 +000081pub(crate) mod jail_helpers;
Anton Romanov5acc0f52022-01-28 00:18:11 +000082use jail_helpers::*;
83mod vcpu;
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +090084
David Tolnay2b089fc2019-03-04 15:33:22 -080085#[cfg(feature = "gpu")]
Anton Romanovdb0f4d62022-03-23 21:24:29 +000086pub(crate) mod gpu;
Chirantan Ekbote44292f52021-06-25 18:31:41 +090087#[cfg(feature = "gpu")]
Dmitry Torokhove464a7a2022-01-26 13:29:36 -080088pub use gpu::GpuRenderServerParameters;
89#[cfg(feature = "gpu")]
Anton Romanov5acc0f52022-01-28 00:18:11 +000090use gpu::*;
Jorge E. Moreirad4562d02021-06-28 16:21:12 -070091
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080092// gpu_device_tube is not used when GPU support is disabled.
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -070093#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -080094fn create_virtio_devices(
95 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -070096 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -070097 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -070098 _exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080099 wayland_device_tube: Tube,
100 gpu_device_tube: Tube,
Alexandre Courbote55b7912022-03-04 16:54:38 +0900101 vhost_user_gpu_tubes: Vec<(Tube, Tube, Tube)>,
Andrew Walbran3cd93602022-01-25 13:59:23 +0000102 balloon_device_tube: Option<Tube>,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800103 balloon_inflate_tube: Option<Tube>,
David Stevens06d157a2022-01-13 23:44:48 +0900104 init_balloon_size: u64,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800105 disk_device_tubes: &mut Vec<Tube>,
106 pmem_device_tubes: &mut Vec<Tube>,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -0800107 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800108 fs_device_tubes: &mut Vec<Tube>,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -0800109 #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>,
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -0800110 vvu_proxy_device_tubes: &mut Vec<Tube>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800111) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -0700112 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -0800113
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900114 #[cfg(feature = "gpu")]
Alexandre Courbote55b7912022-03-04 16:54:38 +0900115 for (opt, (host_gpu_tube, device_gpu_tube, device_control_tube)) in
116 cfg.vhost_user_gpu.iter().zip(vhost_user_gpu_tubes)
117 {
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900118 devs.push(create_vhost_user_gpu_device(
119 cfg,
120 opt,
Alexandre Courbote55b7912022-03-04 16:54:38 +0900121 (host_gpu_tube, device_gpu_tube),
122 device_control_tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900123 )?);
124 }
125
Abhishek Bhardwaj103c1b72021-11-01 15:52:23 -0700126 for opt in &cfg.vvu_proxy {
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -0800127 devs.push(create_vvu_proxy_device(
128 cfg,
129 opt,
130 vvu_proxy_device_tubes.remove(0),
131 )?);
Abhishek Bhardwaj103c1b72021-11-01 15:52:23 -0700132 }
133
David Tolnayfa701712019-02-13 16:42:54 -0800134 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800135 let mut resource_bridges = Vec::<Tube>::new();
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900136
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900137 if !cfg.wayland_socket_paths.is_empty() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900138 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800139 let mut wl_resource_bridge = None::<Tube>;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900140
141 #[cfg(feature = "gpu")]
142 {
Jason Macnakcc7070b2019-11-06 14:48:12 -0800143 if cfg.gpu_parameters.is_some() {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700144 let (wl_socket, gpu_socket) = Tube::pair().context("failed to create tube")?;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900145 resource_bridges.push(gpu_socket);
146 wl_resource_bridge = Some(wl_socket);
147 }
148 }
149
150 devs.push(create_wayland_device(
151 cfg,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800152 wayland_device_tube,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900153 wl_resource_bridge,
154 )?);
155 }
David Tolnayfa701712019-02-13 16:42:54 -0800156
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900157 #[cfg(feature = "video-decoder")]
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900158 let video_dec_cfg = if let Some(backend) = cfg.video_dec {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700159 let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
Daniel Verkampffb59122021-03-18 14:06:15 -0700160 resource_bridges.push(gpu_tube);
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900161 Some((video_tube, backend))
Daniel Verkampffb59122021-03-18 14:06:15 -0700162 } else {
163 None
164 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900165
166 #[cfg(feature = "video-encoder")]
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900167 let video_enc_cfg = if let Some(backend) = cfg.video_enc {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700168 let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
Daniel Verkampffb59122021-03-18 14:06:15 -0700169 resource_bridges.push(gpu_tube);
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900170 Some((video_tube, backend))
Daniel Verkampffb59122021-03-18 14:06:15 -0700171 } else {
172 None
173 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900174
Zach Reizner3a8100a2017-09-13 19:15:43 -0700175 #[cfg(feature = "gpu")]
176 {
Noah Golddc7f52b2020-02-01 13:01:58 -0800177 if let Some(gpu_parameters) = &cfg.gpu_parameters {
Anton Romanov5acc0f52022-01-28 00:18:11 +0000178 let mut gpu_display_w = virtio::DEFAULT_DISPLAY_WIDTH;
179 let mut gpu_display_h = virtio::DEFAULT_DISPLAY_HEIGHT;
Jason Macnakd659a0d2021-03-15 15:33:01 -0700180 if !gpu_parameters.displays.is_empty() {
181 gpu_display_w = gpu_parameters.displays[0].width;
182 gpu_display_h = gpu_parameters.displays[0].height;
183 }
184
Zach Reizner65b98f12019-11-22 17:34:58 -0800185 let mut event_devices = Vec::new();
186 if cfg.display_window_mouse {
187 let (event_device_socket, virtio_dev_socket) =
Daniel Verkamp6b298582021-08-16 15:37:11 -0700188 UnixStream::pair().context("failed to create socket")?;
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000189 let (multi_touch_width, multi_touch_height) = cfg
190 .virtio_multi_touch
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700191 .first()
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800192 .as_ref()
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000193 .map(|multi_touch_spec| multi_touch_spec.get_size())
Jason Macnakd659a0d2021-03-15 15:33:01 -0700194 .unwrap_or((gpu_display_w, gpu_display_h));
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000195 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700196 // u32::MAX is the least likely to collide with the indices generated above for
197 // the multi_touch options, which begin at 0.
198 u32::MAX,
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800199 virtio_dev_socket,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000200 multi_touch_width,
201 multi_touch_height,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700202 virtio::base_features(cfg.protected_vm),
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800203 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700204 .context("failed to set up mouse device")?;
Zach Reizner65b98f12019-11-22 17:34:58 -0800205 devs.push(VirtioDeviceStub {
206 dev: Box::new(dev),
Alexandre Courbot6a8f6562022-03-24 14:43:48 +0900207 jail: simple_jail(&cfg.jail_config, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -0800208 });
209 event_devices.push(EventDevice::touchscreen(event_device_socket));
210 }
211 if cfg.display_window_keyboard {
212 let (event_device_socket, virtio_dev_socket) =
Daniel Verkamp6b298582021-08-16 15:37:11 -0700213 UnixStream::pair().context("failed to create socket")?;
Noah Goldd4ca29b2020-10-27 12:21:52 -0700214 let dev = virtio::new_keyboard(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700215 // u32::MAX is the least likely to collide with the indices generated above for
216 // the multi_touch options, which begin at 0.
217 u32::MAX,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700218 virtio_dev_socket,
219 virtio::base_features(cfg.protected_vm),
220 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700221 .context("failed to set up keyboard device")?;
Zach Reizner65b98f12019-11-22 17:34:58 -0800222 devs.push(VirtioDeviceStub {
223 dev: Box::new(dev),
Alexandre Courbot6a8f6562022-03-24 14:43:48 +0900224 jail: simple_jail(&cfg.jail_config, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -0800225 });
226 event_devices.push(EventDevice::keyboard(event_device_socket));
227 }
Chia-I Wu16fb6592021-11-10 11:45:32 -0800228
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700229 devs.push(create_gpu_device(
230 cfg,
231 _exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800232 gpu_device_tube,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700233 resource_bridges,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900234 // Use the unnamed socket for GPU display screens.
235 cfg.wayland_socket_paths.get(""),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700236 cfg.x_display.clone(),
Chia-I Wu16fb6592021-11-10 11:45:32 -0800237 render_server_fd,
Zach Reizner65b98f12019-11-22 17:34:58 -0800238 event_devices,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -0800239 map_request,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700240 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -0700241 }
242 }
243
Richard Fung08289b12022-02-02 20:46:19 +0000244 for (_, param) in cfg
245 .serial_parameters
246 .iter()
247 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
248 {
249 let dev = create_console_device(cfg, param)?;
250 devs.push(dev);
251 }
252
253 for disk in &cfg.disks {
254 let disk_device_tube = disk_device_tubes.remove(0);
255 devs.push(create_block_device(cfg, disk, disk_device_tube)?);
256 }
257
258 for blk in &cfg.vhost_user_blk {
259 devs.push(create_vhost_user_block_device(cfg, blk)?);
260 }
261
262 for console in &cfg.vhost_user_console {
263 devs.push(create_vhost_user_console_device(cfg, console)?);
264 }
265
266 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
267 let pmem_device_tube = pmem_device_tubes.remove(0);
268 devs.push(create_pmem_device(
269 cfg,
270 vm,
271 resources,
272 pmem_disk,
273 index,
274 pmem_device_tube,
275 )?);
276 }
277
Andrew Walbrana24a7522022-02-09 18:23:00 +0000278 if cfg.rng {
279 devs.push(create_rng_device(cfg)?);
280 }
Richard Fung08289b12022-02-02 20:46:19 +0000281
282 #[cfg(feature = "tpm")]
283 {
284 if cfg.software_tpm {
Daniel Verkamp29950ef2021-10-07 14:56:45 -0700285 devs.push(create_software_tpm_device(cfg)?);
Richard Fung08289b12022-02-02 20:46:19 +0000286 }
287 }
288
289 for (idx, single_touch_spec) in cfg.virtio_single_touch.iter().enumerate() {
290 devs.push(create_single_touch_device(
291 cfg,
292 single_touch_spec,
293 idx as u32,
294 )?);
295 }
296
297 for (idx, multi_touch_spec) in cfg.virtio_multi_touch.iter().enumerate() {
298 devs.push(create_multi_touch_device(
299 cfg,
300 multi_touch_spec,
301 idx as u32,
302 )?);
303 }
304
305 for (idx, trackpad_spec) in cfg.virtio_trackpad.iter().enumerate() {
306 devs.push(create_trackpad_device(cfg, trackpad_spec, idx as u32)?);
307 }
308
309 for (idx, mouse_socket) in cfg.virtio_mice.iter().enumerate() {
310 devs.push(create_mouse_device(cfg, mouse_socket, idx as u32)?);
311 }
312
313 for (idx, keyboard_socket) in cfg.virtio_keyboard.iter().enumerate() {
314 devs.push(create_keyboard_device(cfg, keyboard_socket, idx as u32)?);
315 }
316
317 for (idx, switches_socket) in cfg.virtio_switches.iter().enumerate() {
318 devs.push(create_switches_device(cfg, switches_socket, idx as u32)?);
319 }
320
321 for dev_path in &cfg.virtio_input_evdevs {
322 devs.push(create_vinput_device(cfg, dev_path)?);
323 }
324
325 if let Some(balloon_device_tube) = balloon_device_tube {
326 devs.push(create_balloon_device(
327 cfg,
328 balloon_device_tube,
329 balloon_inflate_tube,
330 init_balloon_size,
331 )?);
332 }
333
334 // We checked above that if the IP is defined, then the netmask is, too.
335 for tap_fd in &cfg.tap_fd {
336 devs.push(create_tap_net_device_from_fd(cfg, *tap_fd)?);
337 }
338
339 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
340 (cfg.host_ip, cfg.netmask, cfg.mac_address)
341 {
342 if !cfg.vhost_user_net.is_empty() {
343 bail!("vhost-user-net cannot be used with any of --host_ip, --netmask or --mac");
344 }
345 devs.push(create_net_device_from_config(
346 cfg,
347 host_ip,
348 netmask,
349 mac_address,
350 )?);
351 }
352
353 for tap_name in &cfg.tap_name {
354 devs.push(create_tap_net_device_from_name(cfg, tap_name.as_bytes())?);
355 }
356
357 for net in &cfg.vhost_user_net {
358 devs.push(create_vhost_user_net_device(cfg, net)?);
359 }
360
361 for vsock in &cfg.vhost_user_vsock {
362 devs.push(create_vhost_user_vsock_device(cfg, vsock)?);
363 }
364
365 for opt in &cfg.vhost_user_wl {
366 devs.push(create_vhost_user_wl_device(cfg, opt)?);
367 }
368
Chih-Yang Hsiae31731c2022-01-05 17:30:28 +0800369 #[cfg(feature = "audio_cras")]
370 {
371 for cras_snd in &cfg.cras_snds {
372 devs.push(create_cras_snd_device(cfg, cras_snd.clone())?);
373 }
374 }
375
Daniel Verkampffb59122021-03-18 14:06:15 -0700376 #[cfg(feature = "video-decoder")]
377 {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900378 if let Some((video_dec_tube, video_dec_backend)) = video_dec_cfg {
Daniel Verkampffb59122021-03-18 14:06:15 -0700379 register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900380 video_dec_backend,
Daniel Verkampffb59122021-03-18 14:06:15 -0700381 &mut devs,
382 video_dec_tube,
383 cfg,
384 devices::virtio::VideoDeviceType::Decoder,
385 )?;
386 }
387 }
388
389 #[cfg(feature = "video-encoder")]
390 {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900391 if let Some((video_enc_tube, video_enc_backend)) = video_enc_cfg {
Daniel Verkampffb59122021-03-18 14:06:15 -0700392 register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900393 video_enc_backend,
Daniel Verkampffb59122021-03-18 14:06:15 -0700394 &mut devs,
395 video_enc_tube,
396 cfg,
397 devices::virtio::VideoDeviceType::Encoder,
398 )?;
399 }
400 }
401
Zach Reizneraa575662018-08-15 10:46:32 -0700402 if let Some(cid) = cfg.cid {
Alexandre Courbotc6ad83f2022-02-07 19:45:31 +0900403 let vhost_config = VhostVsockConfig {
404 device: cfg
405 .vhost_vsock_device
406 .clone()
407 .unwrap_or(VhostVsockDeviceParameter::default()),
408 cid,
409 };
410 devs.push(create_vhost_vsock_device(cfg, &vhost_config)?);
Zach Reizneraa575662018-08-15 10:46:32 -0700411 }
412
Woody Chow5890b702021-02-12 14:57:02 +0900413 for vhost_user_fs in &cfg.vhost_user_fs {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700414 devs.push(create_vhost_user_fs_device(cfg, vhost_user_fs)?);
Woody Chow5890b702021-02-12 14:57:02 +0900415 }
416
Woody Chow1b16db12021-04-02 16:59:59 +0900417 #[cfg(feature = "audio")]
418 for vhost_user_snd in &cfg.vhost_user_snd {
419 devs.push(create_vhost_user_snd_device(cfg, vhost_user_snd)?);
420 }
421
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900422 for shared_dir in &cfg.shared_dirs {
423 let SharedDir {
424 src,
425 tag,
426 kind,
427 uid_map,
428 gid_map,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +0900429 fs_cfg,
430 p9_cfg,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900431 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -0800432
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900433 let dev = match kind {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +0900434 SharedDirKind::FS => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800435 let device_tube = fs_device_tubes.remove(0);
436 create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone(), device_tube)?
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +0900437 }
Chirantan Ekbote75ba8752020-10-27 18:33:02 +0900438 SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag, p9_cfg.clone())?,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900439 };
440 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -0800441 }
442
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900443 if let Some(vhost_user_mac80211_hwsim) = &cfg.vhost_user_mac80211_hwsim {
444 devs.push(create_vhost_user_mac80211_hwsim_device(
445 cfg,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700446 vhost_user_mac80211_hwsim,
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900447 )?);
448 }
449
Jorge E. Moreirad4562d02021-06-28 16:21:12 -0700450 #[cfg(feature = "audio")]
451 if let Some(path) = &cfg.sound {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700452 devs.push(create_sound_device(path, cfg)?);
Jorge E. Moreirad4562d02021-06-28 16:21:12 -0700453 }
454
David Tolnay2b089fc2019-03-04 15:33:22 -0800455 Ok(devs)
456}
457
458fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -0600459 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -0700460 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -0700461 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -0700462 exit_evt: &Event,
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +0000463 panic_wrtube: Tube,
Haiwei Li09b7b8e2022-02-18 18:16:05 +0800464 iommu_attached_endpoints: &mut BTreeMap<u32, Arc<Mutex<Box<dyn MemoryMapperTrait>>>>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800465 control_tubes: &mut Vec<TaggedControlTube>,
466 wayland_device_tube: Tube,
467 gpu_device_tube: Tube,
Alexandre Courbote55b7912022-03-04 16:54:38 +0900468 // Tuple content: (host-side GPU tube, device-side GPU tube, device-side control tube).
469 vhost_user_gpu_tubes: Vec<(Tube, Tube, Tube)>,
Andrew Walbran3cd93602022-01-25 13:59:23 +0000470 balloon_device_tube: Option<Tube>,
David Stevens06d157a2022-01-13 23:44:48 +0900471 init_balloon_size: u64,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800472 disk_device_tubes: &mut Vec<Tube>,
473 pmem_device_tubes: &mut Vec<Tube>,
474 fs_device_tubes: &mut Vec<Tube>,
Daniel Verkampf1439d42021-05-21 13:55:10 -0700475 #[cfg(feature = "usb")] usb_provider: HostBackendDeviceProvider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -0800476 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -0800477 #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>,
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -0800478 vvu_proxy_device_tubes: &mut Vec<Tube>,
Tomasz Nowickiab86d522021-09-22 05:50:46 +0000479) -> DeviceResult<Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>> {
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800480 let mut devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)> = Vec::new();
481 let mut balloon_inflate_tube: Option<Tube> = None;
Zide Chen5deee482021-04-19 11:06:01 -0700482 if !cfg.vfio.is_empty() {
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800483 let mut coiommu_attached_endpoints = Vec::new();
Zide Chendfc4b882021-03-10 16:35:37 -0800484
Tomasz Nowicki71aca792021-06-09 18:53:49 +0000485 for vfio_dev in cfg
486 .vfio
487 .iter()
488 .filter(|dev| dev.get_type() == VfioType::Pci)
489 {
490 let vfio_path = &vfio_dev.vfio_path;
Zide Chen5deee482021-04-19 11:06:01 -0700491 let (vfio_pci_device, jail) = create_vfio_device(
492 cfg,
493 vm,
494 resources,
495 control_tubes,
496 vfio_path.as_path(),
Xiong Zhangf82f2dc2021-05-21 16:54:12 +0800497 None,
Victor Ding3f749592022-03-18 05:44:20 +0000498 vfio_dev.guest_address(),
Haiwei Li09b7b8e2022-02-18 18:16:05 +0800499 iommu_attached_endpoints,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800500 Some(&mut coiommu_attached_endpoints),
501 vfio_dev.iommu_dev_type(),
Zide Chen5deee482021-04-19 11:06:01 -0700502 )?;
Zide Chendfc4b882021-03-10 16:35:37 -0800503
Tomasz Nowickiab86d522021-09-22 05:50:46 +0000504 devices.push((vfio_pci_device, jail));
Zide Chen5deee482021-04-19 11:06:01 -0700505 }
Zide Chendfc4b882021-03-10 16:35:37 -0800506
Tomasz Nowicki344eb142021-09-22 05:51:58 +0000507 for vfio_dev in cfg
508 .vfio
509 .iter()
510 .filter(|dev| dev.get_type() == VfioType::Platform)
511 {
512 let vfio_path = &vfio_dev.vfio_path;
513 let (vfio_plat_dev, jail) = create_vfio_platform_device(
514 cfg,
515 vm,
516 resources,
517 control_tubes,
518 vfio_path.as_path(),
Haiwei Li09b7b8e2022-02-18 18:16:05 +0800519 iommu_attached_endpoints,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800520 IommuDevType::NoIommu, // Virtio IOMMU is not supported yet
Tomasz Nowicki344eb142021-09-22 05:51:58 +0000521 )?;
522
523 devices.push((Box::new(vfio_plat_dev), jail));
524 }
525
Chuanxiao Dongcb03ec62022-01-20 08:25:38 +0800526 if !coiommu_attached_endpoints.is_empty() || !iommu_attached_endpoints.is_empty() {
527 let mut buf = mem::MaybeUninit::<libc::rlimit>::zeroed();
528 let res = unsafe { libc::getrlimit(libc::RLIMIT_MEMLOCK, buf.as_mut_ptr()) };
529 if res == 0 {
530 let limit = unsafe { buf.assume_init() };
531 let rlim_new = limit
532 .rlim_cur
533 .saturating_add(vm.get_memory().memory_size() as libc::rlim_t);
534 let rlim_max = max(limit.rlim_max, rlim_new);
535 if limit.rlim_cur < rlim_new {
536 let limit_arg = libc::rlimit {
537 rlim_cur: rlim_new as libc::rlim_t,
538 rlim_max: rlim_max as libc::rlim_t,
539 };
540 let res = unsafe { libc::setrlimit(libc::RLIMIT_MEMLOCK, &limit_arg) };
541 if res != 0 {
542 bail!("Set rlimit failed");
543 }
544 }
545 } else {
546 bail!("Get rlimit failed");
547 }
548 }
549
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800550 if !coiommu_attached_endpoints.is_empty() {
551 let vfio_container =
552 VfioCommonSetup::vfio_get_container(IommuDevType::CoIommu, None as Option<&Path>)
553 .context("failed to get vfio container")?;
554 let (coiommu_host_tube, coiommu_device_tube) =
555 Tube::pair().context("failed to create coiommu tube")?;
556 control_tubes.push(TaggedControlTube::VmMemory(coiommu_host_tube));
557 let vcpu_count = cfg.vcpu_count.unwrap_or(1) as u64;
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800558 let (coiommu_tube, balloon_tube) =
559 Tube::pair().context("failed to create coiommu tube")?;
560 balloon_inflate_tube = Some(balloon_tube);
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800561 let dev = CoIommuDev::new(
562 vm.get_memory().clone(),
563 vfio_container,
564 coiommu_device_tube,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800565 coiommu_tube,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800566 coiommu_attached_endpoints,
567 vcpu_count,
Chuanxiao Dongd4468612022-01-14 14:21:17 +0800568 cfg.coiommu_param.unwrap_or_default(),
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800569 )
570 .context("failed to create coiommu device")?;
571
Alexandre Courbot6a8f6562022-03-24 14:43:48 +0900572 devices.push((Box::new(dev), simple_jail(&cfg.jail_config, "coiommu")?));
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800573 }
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800574 }
575
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800576 let stubs = create_virtio_devices(
577 cfg,
578 vm,
579 resources,
580 exit_evt,
581 wayland_device_tube,
582 gpu_device_tube,
583 vhost_user_gpu_tubes,
584 balloon_device_tube,
585 balloon_inflate_tube,
David Stevens06d157a2022-01-13 23:44:48 +0900586 init_balloon_size,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800587 disk_device_tubes,
588 pmem_device_tubes,
589 map_request,
590 fs_device_tubes,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -0800591 #[cfg(feature = "gpu")]
592 render_server_fd,
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -0800593 vvu_proxy_device_tubes,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800594 )?;
595
596 for stub in stubs {
597 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
598 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
599 let dev = VirtioPciDevice::new(vm.get_memory().clone(), stub.dev, msi_device_tube)
600 .context("failed to create virtio pci dev")?;
601 let dev = Box::new(dev) as Box<dyn BusDeviceObj>;
602 devices.push((dev, stub.jail));
603 }
604
605 #[cfg(feature = "audio")]
606 for ac97_param in &cfg.ac97_parameters {
607 let dev = Ac97Dev::try_new(vm.get_memory().clone(), ac97_param.clone())
608 .context("failed to create ac97 device")?;
Alexandre Courbot6a8f6562022-03-24 14:43:48 +0900609 let jail = simple_jail(&cfg.jail_config, dev.minijail_policy())?;
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800610 devices.push((Box::new(dev), jail));
611 }
612
613 #[cfg(feature = "usb")]
Sebastian Ene0440d352022-02-04 12:23:56 +0000614 if cfg.usb {
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800615 // Create xhci controller.
616 let usb_controller = Box::new(XhciController::new(vm.get_memory().clone(), usb_provider));
Alexandre Courbot6a8f6562022-03-24 14:43:48 +0900617 devices.push((usb_controller, simple_jail(&cfg.jail_config, "xhci")?));
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800618 }
619
Mattias Nisslerde2c6402021-10-21 12:05:29 +0000620 for params in &cfg.stub_pci_devices {
621 // Stub devices don't need jailing since they don't do anything.
622 devices.push((Box::new(StubPciDevice::new(params)), None));
623 }
624
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +0000625 devices.push((Box::new(PvPanicPciDevice::new(panic_wrtube)), None));
Tomasz Nowickiab86d522021-09-22 05:50:46 +0000626 Ok(devices)
David Tolnay2b089fc2019-03-04 15:33:22 -0800627}
628
Mattias Nisslerbbd91d02021-12-07 08:57:45 +0000629fn create_file_backed_mappings(
630 cfg: &Config,
631 vm: &mut impl Vm,
632 resources: &mut SystemAllocator,
633) -> Result<()> {
634 for mapping in &cfg.file_backed_mappings {
635 let file = OpenOptions::new()
636 .read(true)
637 .write(mapping.writable)
638 .custom_flags(if mapping.sync { libc::O_SYNC } else { 0 })
639 .open(&mapping.path)
640 .context("failed to open file for file-backed mapping")?;
641 let prot = if mapping.writable {
642 Protection::read_write()
643 } else {
644 Protection::read()
645 };
646 let size = mapping
647 .size
648 .try_into()
649 .context("Invalid size for file-backed mapping")?;
650 let memory_mapping = MemoryMappingBuilder::new(size)
651 .from_file(&file)
652 .offset(mapping.offset)
653 .protection(prot)
654 .build()
655 .context("failed to map backing file for file-backed mapping")?;
656
Daniel Verkampde4d7292022-03-01 15:22:38 -0800657 match resources.mmio_allocator_any().allocate_at(
658 mapping.address,
659 mapping.size,
660 Alloc::FileBacked(mapping.address),
661 "file-backed mapping".to_owned(),
662 ) {
663 // OutOfSpace just means that this mapping is not in the MMIO regions at all, so don't
664 // consider it an error.
665 // TODO(b/222769529): Reserve this region in a global memory address space allocator once
666 // we have that so nothing else can accidentally overlap with it.
667 Ok(()) | Err(resources::Error::OutOfSpace) => {}
668 e => e.context("failed to allocate guest address for file-backed mapping")?,
669 }
Mattias Nisslerbbd91d02021-12-07 08:57:45 +0000670
671 vm.add_memory_region(
672 GuestAddress(mapping.address),
673 Box::new(memory_mapping),
674 !mapping.writable,
675 /* log_dirty_pages = */ false,
676 )
677 .context("failed to configure file-backed mapping")?;
678 }
679
680 Ok(())
681}
682
Anton Romanov33334412022-03-22 17:48:18 +0000683#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Xiong Zhangf7874712021-12-24 10:53:59 +0800684fn create_pcie_root_port(
Xiong Zhang626f0142022-03-12 16:05:17 +0800685 host_pcie_rp: Vec<HostPcieRootPortParameters>,
Xiong Zhangf7874712021-12-24 10:53:59 +0800686 sys_allocator: &mut SystemAllocator,
687 control_tubes: &mut Vec<TaggedControlTube>,
688 devices: &mut Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
689 hp_vec: &mut Vec<Arc<Mutex<dyn HotPlugBus>>>,
Haiwei Li09b7b8e2022-02-18 18:16:05 +0800690 hp_endpoints_ranges: &mut Vec<RangeInclusive<u32>>,
Xiong Zhang626f0142022-03-12 16:05:17 +0800691 gpe_notify_devs: &mut Vec<(u32, Arc<Mutex<dyn GpeNotify>>)>,
Xiong Zhangf7874712021-12-24 10:53:59 +0800692) -> Result<()> {
693 if host_pcie_rp.is_empty() {
694 // user doesn't specify host pcie root port which link to this virtual pcie rp,
695 // find the empty bus and create a total virtual pcie rp
Haiwei Lie35d4652022-02-10 15:39:33 +0800696 let mut hp_sec_bus = 0u8;
697 // Create Pcie Root Port for non-root buses, each non-root bus device will be
698 // connected behind a virtual pcie root port.
699 for i in 1..255 {
700 if sys_allocator.pci_bus_empty(i) {
701 if hp_sec_bus == 0 {
702 hp_sec_bus = i;
703 }
704 continue;
705 }
706 let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new(i, false)));
707 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
708 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
709 let pci_bridge = Box::new(PciBridge::new(pcie_root_port.clone(), msi_device_tube));
710 // no ipc is used if the root port disables hotplug
711 devices.push((pci_bridge, None));
712 }
713
714 // Create Pcie Root Port for hot-plug
715 if hp_sec_bus == 0 {
716 return Err(anyhow!("no more addresses are available"));
717 }
718 let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new(hp_sec_bus, true)));
Xiong Zhangf7874712021-12-24 10:53:59 +0800719 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
720 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
721 let pci_bridge = Box::new(PciBridge::new(pcie_root_port.clone(), msi_device_tube));
722
Haiwei Li09b7b8e2022-02-18 18:16:05 +0800723 hp_endpoints_ranges.push(RangeInclusive::new(
724 PciAddress {
725 bus: pci_bridge.get_secondary_num(),
726 dev: 0,
727 func: 0,
728 }
729 .to_u32(),
730 PciAddress {
731 bus: pci_bridge.get_subordinate_num(),
732 dev: 32,
733 func: 8,
734 }
735 .to_u32(),
736 ));
737
Xiong Zhangf7874712021-12-24 10:53:59 +0800738 devices.push((pci_bridge, None));
739 hp_vec.push(pcie_root_port as Arc<Mutex<dyn HotPlugBus>>);
740 } else {
741 // user specify host pcie root port which link to this virtual pcie rp,
742 // reserve the host pci BDF and create a virtual pcie RP with some attrs same as host
Xiong Zhang626f0142022-03-12 16:05:17 +0800743 for host_pcie in host_pcie_rp.iter() {
Xiong Zhangcdffe492021-12-24 15:13:30 +0800744 let (vm_host_tube, vm_device_tube) = Tube::pair().context("failed to create tube")?;
Xiong Zhang626f0142022-03-12 16:05:17 +0800745 let pcie_host = PcieHostRootPort::new(host_pcie.host_path.as_path(), vm_device_tube)?;
Xiong Zhangd6de3192022-02-16 13:24:06 +0800746 let bus_range = pcie_host.get_bus_range();
747 let mut slot_implemented = true;
748 for i in bus_range.secondary..=bus_range.subordinate {
749 // if this bus is occupied by one vfio-pci device, this vfio-pci device is
750 // connected to a pci bridge on host statically, then it should be connected
751 // to a virtual pci bridge in guest statically, this bridge won't have
752 // hotplug capability and won't use slot.
753 if !sys_allocator.pci_bus_empty(i) {
754 slot_implemented = false;
Haiwei Lie4a9e822022-03-24 14:18:32 +0800755 break;
Xiong Zhangd6de3192022-02-16 13:24:06 +0800756 }
757 }
Xiong Zhang626f0142022-03-12 16:05:17 +0800758
Xiong Zhangd6de3192022-02-16 13:24:06 +0800759 let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new_from_host(
760 pcie_host,
761 slot_implemented,
762 )?));
Xiong Zhangcdffe492021-12-24 15:13:30 +0800763 control_tubes.push(TaggedControlTube::Vm(vm_host_tube));
Xiong Zhangf7874712021-12-24 10:53:59 +0800764
765 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
766 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
767 let mut pci_bridge = Box::new(PciBridge::new(pcie_root_port.clone(), msi_device_tube));
768 // early reservation for host pcie root port devices.
769 let rootport_addr = pci_bridge.allocate_address(sys_allocator);
770 if rootport_addr.is_err() {
771 warn!(
772 "address reservation failed for hot pcie root port {}",
773 pci_bridge.debug_label()
774 );
775 }
776
Haiwei Lie4a9e822022-03-24 14:18:32 +0800777 // Only append the sub pci range of a hot-pluggable root port to virtio-iommu
778 if slot_implemented {
779 hp_endpoints_ranges.push(RangeInclusive::new(
780 PciAddress {
781 bus: pci_bridge.get_secondary_num(),
782 dev: 0,
783 func: 0,
784 }
785 .to_u32(),
786 PciAddress {
787 bus: pci_bridge.get_subordinate_num(),
788 dev: 32,
789 func: 8,
790 }
791 .to_u32(),
792 ));
793 }
Haiwei Li09b7b8e2022-02-18 18:16:05 +0800794
Xiong Zhangf7874712021-12-24 10:53:59 +0800795 devices.push((pci_bridge, None));
Xiong Zhang626f0142022-03-12 16:05:17 +0800796 if slot_implemented {
797 if let Some(gpe) = host_pcie.hp_gpe {
798 gpe_notify_devs
799 .push((gpe, pcie_root_port.clone() as Arc<Mutex<dyn GpeNotify>>));
800 }
801 hp_vec.push(pcie_root_port as Arc<Mutex<dyn HotPlugBus>>);
802 }
Xiong Zhangf7874712021-12-24 10:53:59 +0800803 }
804 }
805
806 Ok(())
807}
808
Zach Reiznera90649a2021-03-31 12:56:08 -0700809fn setup_vm_components(cfg: &Config) -> Result<VmComponents> {
David Tolnay2b089fc2019-03-04 15:33:22 -0800810 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +0100811 Some(
Daniel Verkamped6b27a2022-03-25 14:06:05 -0700812 open_file(initrd_path, OpenOptions::new().read(true))
813 .with_context(|| format!("failed to open initrd {}", initrd_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +0100814 )
Daniel Verkampe403f5c2018-12-11 16:29:26 -0800815 } else {
816 None
817 };
818
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700819 let vm_image = match cfg.executable_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +0100820 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
Daniel Verkamped6b27a2022-03-25 14:06:05 -0700821 open_file(kernel_path, OpenOptions::new().read(true)).with_context(|| {
822 format!("failed to open kernel image {}", kernel_path.display())
823 })?,
Andrew Walbranbc55e302021-07-13 17:35:10 +0100824 ),
825 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
Daniel Verkamped6b27a2022-03-25 14:06:05 -0700826 open_file(bios_path, OpenOptions::new().read(true))
Daniel Verkamp6b298582021-08-16 15:37:11 -0700827 .with_context(|| format!("failed to open bios {}", bios_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +0100828 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700829 _ => panic!("Did not receive a bios or kernel, should be impossible."),
830 };
831
Will Deaconc48e7832021-07-30 19:03:06 +0100832 let swiotlb = if let Some(size) = cfg.swiotlb {
833 Some(
834 size.checked_mul(1024 * 1024)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700835 .ok_or_else(|| anyhow!("requested swiotlb size too large"))?,
Will Deaconc48e7832021-07-30 19:03:06 +0100836 )
837 } else {
838 match cfg.protected_vm {
Andrew Walbran0bbbb682021-12-13 13:42:07 +0000839 ProtectionType::Protected | ProtectionType::ProtectedWithoutFirmware => {
840 Some(64 * 1024 * 1024)
841 }
Will Deaconc48e7832021-07-30 19:03:06 +0100842 ProtectionType::Unprotected => None,
843 }
844 };
845
Zach Reiznera90649a2021-03-31 12:56:08 -0700846 Ok(VmComponents {
Daniel Verkamp6a847062019-11-26 13:16:35 -0800847 memory_size: cfg
848 .memory
849 .unwrap_or(256)
850 .checked_mul(1024 * 1024)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700851 .ok_or_else(|| anyhow!("requested memory size too large"))?,
Will Deaconc48e7832021-07-30 19:03:06 +0100852 swiotlb,
Dylan Reid059a1882018-07-23 17:58:09 -0700853 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -0700854 vcpu_affinity: cfg.vcpu_affinity.clone(),
Daniel Verkamp8a72afc2021-03-15 17:55:52 -0700855 cpu_clusters: cfg.cpu_clusters.clone(),
856 cpu_capacity: cfg.cpu_capacity.clone(),
Dmytro Maluka74031b42022-02-25 18:00:17 +0000857 #[cfg(feature = "direct")]
Dmytro Maluka6cea2c72022-02-25 18:22:17 +0000858 direct_gpe: cfg.direct_gpe.clone(),
Suleiman Souhlal015c3c12020-10-07 14:15:41 +0900859 no_smt: cfg.no_smt,
Sergey Senozhatsky1e369c52021-04-13 20:23:51 +0900860 hugepages: cfg.hugepages,
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700861 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -0800862 android_fstab: cfg
863 .android_fstab
864 .as_ref()
Daniel Verkamp6b298582021-08-16 15:37:11 -0700865 .map(|x| {
866 File::open(x)
867 .with_context(|| format!("failed to open android fstab file {}", x.display()))
868 })
Tristan Muntsinger4133b012018-12-21 16:01:56 -0800869 .map_or(Ok(None), |v| v.map(Some))?,
Kansho Nishida282115b2019-12-18 13:13:14 +0900870 pstore: cfg.pstore.clone(),
Daniel Verkampe403f5c2018-12-11 16:29:26 -0800871 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -0700872 extra_kernel_params: cfg.params.clone(),
Tomasz Jeznach42644642020-05-20 23:27:59 -0700873 acpi_sdts: cfg
874 .acpi_tables
875 .iter()
Daniel Verkamp6b298582021-08-16 15:37:11 -0700876 .map(|path| {
877 SDT::from_file(path)
878 .with_context(|| format!("failed to open ACPI file {}", path.display()))
879 })
Tomasz Jeznach42644642020-05-20 23:27:59 -0700880 .collect::<Result<Vec<SDT>>>()?,
Kansho Nishidaab205af2020-08-13 18:17:50 +0900881 rt_cpus: cfg.rt_cpus.clone(),
Suleiman Souhlal63630e82021-02-18 11:53:11 +0900882 delay_rt: cfg.delay_rt,
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100883 protected_vm: cfg.protected_vm,
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900884 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznera90649a2021-03-31 12:56:08 -0700885 gdb: None,
Tomasz Jeznachccb26942021-03-30 22:44:11 -0700886 dmi_path: cfg.dmi_path.clone(),
Tomasz Jeznachd93c29f2021-04-12 11:00:24 -0700887 no_legacy: cfg.no_legacy,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +0800888 host_cpu_topology: cfg.host_cpu_topology,
Grzegorz Jaszczykd33874e2022-02-11 18:27:29 +0000889 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
890 force_s2idle: cfg.force_s2idle,
Zach Reiznera90649a2021-03-31 12:56:08 -0700891 })
892}
893
Andrew Walbranb28ae8e2022-01-17 14:33:10 +0000894#[derive(Copy, Clone, Debug, Eq, PartialEq)]
Dmitry Torokhovf75699f2021-12-03 11:19:13 -0800895pub enum ExitState {
896 Reset,
897 Stop,
Andrew Walbran1a19c672022-01-24 17:24:10 +0000898 Crash,
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +0000899 GuestPanic,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -0800900}
901
Daniel Verkamp5586ff52022-02-24 16:34:55 -0800902// Remove ranges in `guest_mem_layout` that overlap with ranges in `file_backed_mappings`.
903// Returns the updated guest memory layout.
904fn punch_holes_in_guest_mem_layout_for_mappings(
905 guest_mem_layout: Vec<(GuestAddress, u64)>,
906 file_backed_mappings: &[FileBackedMappingParameters],
907) -> Vec<(GuestAddress, u64)> {
908 // Create a set containing (start, end) pairs with exclusive end (end = start + size; the byte
909 // at end is not included in the range).
910 let mut layout_set = BTreeSet::new();
911 for (addr, size) in &guest_mem_layout {
912 layout_set.insert((addr.offset(), addr.offset() + size));
913 }
914
915 for mapping in file_backed_mappings {
916 let mapping_start = mapping.address;
917 let mapping_end = mapping_start + mapping.size;
918
919 // Repeatedly split overlapping guest memory regions until no overlaps remain.
920 while let Some((range_start, range_end)) = layout_set
921 .iter()
922 .find(|&&(range_start, range_end)| {
923 mapping_start < range_end && mapping_end > range_start
924 })
925 .cloned()
926 {
927 layout_set.remove(&(range_start, range_end));
928
929 if range_start < mapping_start {
930 layout_set.insert((range_start, mapping_start));
931 }
932 if range_end > mapping_end {
933 layout_set.insert((mapping_end, range_end));
934 }
935 }
936 }
937
938 // Build the final guest memory layout from the modified layout_set.
939 layout_set
940 .iter()
941 .map(|(start, end)| (GuestAddress(*start), end - start))
942 .collect()
943}
944
Dmitry Torokhovf75699f2021-12-03 11:19:13 -0800945pub fn run_config(cfg: Config) -> Result<ExitState> {
Zach Reiznerdc748482021-04-14 13:59:30 -0700946 let components = setup_vm_components(&cfg)?;
947
948 let guest_mem_layout =
Daniel Verkamp6b298582021-08-16 15:37:11 -0700949 Arch::guest_memory_layout(&components).context("failed to create guest memory layout")?;
Daniel Verkamp5586ff52022-02-24 16:34:55 -0800950
951 let guest_mem_layout =
952 punch_holes_in_guest_mem_layout_for_mappings(guest_mem_layout, &cfg.file_backed_mappings);
953
Daniel Verkamp6b298582021-08-16 15:37:11 -0700954 let guest_mem = GuestMemory::new(&guest_mem_layout).context("failed to create guest memory")?;
Zach Reiznerdc748482021-04-14 13:59:30 -0700955 let mut mem_policy = MemoryPolicy::empty();
956 if components.hugepages {
957 mem_policy |= MemoryPolicy::USE_HUGEPAGES;
958 }
Quentin Perret26203802021-12-02 09:48:43 +0000959 guest_mem.set_memory_policy(mem_policy);
Daniel Verkamp6b298582021-08-16 15:37:11 -0700960 let kvm = Kvm::new_with_path(&cfg.kvm_device_path).context("failed to create kvm")?;
Andrew Walbran00f1c9f2021-12-10 17:13:08 +0000961 let vm = KvmVm::new(&kvm, guest_mem, components.protected_vm).context("failed to create vm")?;
Junichi Uekawab3a094e2022-03-29 15:41:47 +0900962
963 if !cfg.userspace_msr.is_empty() {
964 vm.enable_userspace_msr()
965 .context("failed to enable userspace MSR handling, do you have kernel 5.10 or later")?;
966 }
967
Andrew Walbrane79aba12022-01-27 14:12:35 +0000968 // Check that the VM was actually created in protected mode as expected.
969 if cfg.protected_vm != ProtectionType::Unprotected && !vm.check_capability(VmCap::Protected) {
970 bail!("Failed to create protected VM");
971 }
Daniel Verkamp6b298582021-08-16 15:37:11 -0700972 let vm_clone = vm.try_clone().context("failed to clone vm")?;
Zach Reiznerdc748482021-04-14 13:59:30 -0700973
974 enum KvmIrqChip {
975 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
976 Split(KvmSplitIrqChip),
977 Kernel(KvmKernelIrqChip),
978 }
979
980 impl KvmIrqChip {
981 fn as_mut(&mut self) -> &mut dyn IrqChipArch {
982 match self {
983 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
984 KvmIrqChip::Split(i) => i,
985 KvmIrqChip::Kernel(i) => i,
986 }
987 }
988 }
989
990 let ioapic_host_tube;
991 let mut irq_chip = if cfg.split_irqchip {
992 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
993 unimplemented!("KVM split irqchip mode only supported on x86 processors");
994 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
995 {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700996 let (host_tube, ioapic_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerdc748482021-04-14 13:59:30 -0700997 ioapic_host_tube = Some(host_tube);
998 KvmIrqChip::Split(
999 KvmSplitIrqChip::new(
1000 vm_clone,
1001 components.vcpu_count,
1002 ioapic_device_tube,
1003 Some(120),
1004 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001005 .context("failed to create IRQ chip")?,
Zach Reiznerdc748482021-04-14 13:59:30 -07001006 )
1007 }
1008 } else {
1009 ioapic_host_tube = None;
1010 KvmIrqChip::Kernel(
Daniel Verkamp6b298582021-08-16 15:37:11 -07001011 KvmKernelIrqChip::new(vm_clone, components.vcpu_count)
1012 .context("failed to create IRQ chip")?,
Zach Reiznerdc748482021-04-14 13:59:30 -07001013 )
1014 };
1015
1016 run_vm::<KvmVcpu, KvmVm>(cfg, components, vm, irq_chip.as_mut(), ioapic_host_tube)
1017}
1018
1019fn run_vm<Vcpu, V>(
Zach Reiznera90649a2021-03-31 12:56:08 -07001020 cfg: Config,
1021 #[allow(unused_mut)] mut components: VmComponents,
Zach Reiznerdc748482021-04-14 13:59:30 -07001022 mut vm: V,
1023 irq_chip: &mut dyn IrqChipArch,
1024 ioapic_host_tube: Option<Tube>,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001025) -> Result<ExitState>
Zach Reiznera90649a2021-03-31 12:56:08 -07001026where
1027 Vcpu: VcpuArch + 'static,
1028 V: VmArch + 'static,
Zach Reiznera90649a2021-03-31 12:56:08 -07001029{
Alexandre Courbot6a8f6562022-03-24 14:43:48 +09001030 if cfg.jail_config.is_some() {
Zach Reiznera90649a2021-03-31 12:56:08 -07001031 // Printing something to the syslog before entering minijail so that libc's syslogger has a
1032 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
1033 // access to those files will not be possible.
1034 info!("crosvm entering multiprocess mode");
1035 }
1036
Daniel Verkampf1439d42021-05-21 13:55:10 -07001037 #[cfg(feature = "usb")]
Zach Reiznera90649a2021-03-31 12:56:08 -07001038 let (usb_control_tube, usb_provider) =
Daniel Verkamp6b298582021-08-16 15:37:11 -07001039 HostBackendDeviceProvider::new().context("failed to create usb provider")?;
Daniel Verkampf1439d42021-05-21 13:55:10 -07001040
Zach Reiznera90649a2021-03-31 12:56:08 -07001041 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
1042 // before any jailed devices have been spawned, so that we can catch any of them that fail very
1043 // quickly.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001044 let sigchld_fd = SignalFd::new(libc::SIGCHLD).context("failed to create signalfd")?;
Dylan Reid059a1882018-07-23 17:58:09 -07001045
Zach Reiznera60744b2019-02-13 17:33:32 -08001046 let control_server_socket = match &cfg.socket_path {
1047 Some(path) => Some(UnlinkUnixSeqpacketListener(
Daniel Verkamp6b298582021-08-16 15:37:11 -07001048 UnixSeqpacketListener::bind(path).context("failed to create control server")?,
Zach Reiznera60744b2019-02-13 17:33:32 -08001049 )),
1050 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07001051 };
Zach Reiznera60744b2019-02-13 17:33:32 -08001052
Zach Reiznera90649a2021-03-31 12:56:08 -07001053 let mut control_tubes = Vec::new();
1054
1055 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1056 if let Some(port) = cfg.gdb {
1057 // GDB needs a control socket to interrupt vcpus.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001058 let (gdb_host_tube, gdb_control_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznera90649a2021-03-31 12:56:08 -07001059 control_tubes.push(TaggedControlTube::Vm(gdb_host_tube));
1060 components.gdb = Some((port, gdb_control_tube));
1061 }
1062
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09001063 for wl_cfg in &cfg.vhost_user_wl {
1064 let wayland_host_tube = UnixSeqpacket::connect(&wl_cfg.vm_tube)
1065 .map(Tube::new)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001066 .context("failed to connect to wayland tube")?;
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09001067 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
1068 }
1069
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001070 let mut vhost_user_gpu_tubes = Vec::with_capacity(cfg.vhost_user_gpu.len());
1071 for _ in 0..cfg.vhost_user_gpu.len() {
Alexandre Courbote55b7912022-03-04 16:54:38 +09001072 let (host_control_tube, device_control_tube) =
1073 Tube::pair().context("failed to create tube")?;
1074 let (host_gpu_tube, device_gpu_tube) = Tube::pair().context("failed to create tube")?;
1075 vhost_user_gpu_tubes.push((host_gpu_tube, device_gpu_tube, device_control_tube));
1076 control_tubes.push(TaggedControlTube::VmMemory(host_control_tube));
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001077 }
1078
Daniel Verkamp6b298582021-08-16 15:37:11 -07001079 let (wayland_host_tube, wayland_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001080 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
Andrew Walbran3cd93602022-01-25 13:59:23 +00001081
1082 let (balloon_host_tube, balloon_device_tube) = if cfg.balloon {
David Stevens8be9ef02022-01-13 22:50:24 +09001083 if let Some(ref path) = cfg.balloon_control {
1084 (
1085 None,
1086 Some(Tube::new(
1087 UnixSeqpacket::connect(path).context("failed to create balloon control")?,
1088 )),
1089 )
1090 } else {
1091 // Balloon gets a special socket so balloon requests can be forwarded
1092 // from the main process.
1093 let (host, device) = Tube::pair().context("failed to create tube")?;
1094 // Set recv timeout to avoid deadlock on sending BalloonControlCommand
1095 // before the guest is ready.
1096 host.set_recv_timeout(Some(Duration::from_millis(100)))
1097 .context("failed to set timeout")?;
1098 (Some(host), Some(device))
1099 }
Andrew Walbran3cd93602022-01-25 13:59:23 +00001100 } else {
1101 (None, None)
1102 };
Dylan Reid059a1882018-07-23 17:58:09 -07001103
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001104 // Create one control socket per disk.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001105 let mut disk_device_tubes = Vec::new();
1106 let mut disk_host_tubes = Vec::new();
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001107 let disk_count = cfg.disks.len();
1108 for _ in 0..disk_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001109 let (disk_host_tub, disk_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001110 disk_host_tubes.push(disk_host_tub);
1111 disk_device_tubes.push(disk_device_tube);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001112 }
1113
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001114 let mut pmem_device_tubes = Vec::new();
Daniel Verkampe1980a92020-02-07 11:00:55 -08001115 let pmem_count = cfg.pmem_devices.len();
1116 for _ in 0..pmem_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001117 let (pmem_host_tube, pmem_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001118 pmem_device_tubes.push(pmem_device_tube);
1119 control_tubes.push(TaggedControlTube::VmMsync(pmem_host_tube));
Daniel Verkampe1980a92020-02-07 11:00:55 -08001120 }
1121
Daniel Verkamp6b298582021-08-16 15:37:11 -07001122 let (gpu_host_tube, gpu_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001123 control_tubes.push(TaggedControlTube::VmMemory(gpu_host_tube));
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001124
Zach Reiznerdc748482021-04-14 13:59:30 -07001125 if let Some(ioapic_host_tube) = ioapic_host_tube {
1126 control_tubes.push(TaggedControlTube::VmIrq(ioapic_host_tube));
1127 }
Zhuocheng Dingf2e90bf2019-12-02 15:50:20 +08001128
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08001129 let battery = if cfg.battery_type.is_some() {
Daniel Verkampcfe49462021-08-19 17:11:05 -07001130 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(clippy::manual_map))]
Alexandre Courbot6a8f6562022-03-24 14:43:48 +09001131 let jail = match simple_jail(&cfg.jail_config, "battery")? {
Daniel Verkampcfe49462021-08-19 17:11:05 -07001132 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(unused_mut))]
Alex Lauf408c732020-11-10 18:24:04 +09001133 Some(mut jail) => {
1134 // Setup a bind mount to the system D-Bus socket if the powerd monitor is used.
1135 #[cfg(feature = "power-monitor-powerd")]
1136 {
Fergus Dall51200512021-08-19 12:54:26 +10001137 add_current_user_to_jail(&mut jail)?;
Alex Lauf408c732020-11-10 18:24:04 +09001138
1139 // Create a tmpfs in the device's root directory so that we can bind mount files.
1140 jail.mount_with_data(
1141 Path::new("none"),
1142 Path::new("/"),
1143 "tmpfs",
1144 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
1145 "size=67108864",
1146 )?;
1147
1148 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
1149 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
1150 }
1151 Some(jail)
1152 }
1153 None => None,
1154 };
1155 (&cfg.battery_type, jail)
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08001156 } else {
1157 (&cfg.battery_type, None)
1158 };
1159
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001160 let map_request: Arc<Mutex<Option<ExternalMapping>>> = Arc::new(Mutex::new(None));
1161
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001162 let fs_count = cfg
1163 .shared_dirs
1164 .iter()
1165 .filter(|sd| sd.kind == SharedDirKind::FS)
1166 .count();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001167 let mut fs_device_tubes = Vec::with_capacity(fs_count);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001168 for _ in 0..fs_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001169 let (fs_host_tube, fs_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001170 control_tubes.push(TaggedControlTube::Fs(fs_host_tube));
1171 fs_device_tubes.push(fs_device_tube);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001172 }
1173
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -08001174 let mut vvu_proxy_device_tubes = Vec::new();
1175 for _ in 0..cfg.vvu_proxy.len() {
1176 let (vvu_proxy_host_tube, vvu_proxy_device_tube) =
1177 Tube::pair().context("failed to create VVU proxy tube")?;
1178 control_tubes.push(TaggedControlTube::VmMemory(vvu_proxy_host_tube));
1179 vvu_proxy_device_tubes.push(vvu_proxy_device_tube);
1180 }
1181
Daniel Verkamp6b298582021-08-16 15:37:11 -07001182 let exit_evt = Event::new().context("failed to create event")?;
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001183 let reset_evt = Event::new().context("failed to create event")?;
Andrew Walbran1a19c672022-01-24 17:24:10 +00001184 let crash_evt = Event::new().context("failed to create event")?;
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +00001185 let (panic_rdtube, panic_wrtube) = Tube::pair().context("failed to create tube")?;
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09001186
David Stevense4db4172022-03-10 13:26:04 +09001187 let pstore_size = components.pstore.as_ref().map(|pstore| pstore.size as u64);
David Stevensdbd24182022-03-10 10:53:56 +09001188 let mut sys_allocator = SystemAllocator::new(
1189 Arch::get_system_allocator_config(&vm),
1190 pstore_size,
1191 &cfg.mmio_address_ranges,
1192 )
1193 .context("failed to create system allocator")?;
David Stevense4db4172022-03-10 13:26:04 +09001194
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09001195 let ramoops_region = match &components.pstore {
1196 Some(pstore) => Some(
David Stevense4db4172022-03-10 13:26:04 +09001197 arch::pstore::create_memory_region(
1198 &mut vm,
1199 sys_allocator.reserved_region().unwrap(),
1200 pstore,
1201 )
1202 .context("failed to allocate pstore region")?,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09001203 ),
1204 None => None,
1205 };
1206
Mattias Nisslerbbd91d02021-12-07 08:57:45 +00001207 create_file_backed_mappings(&cfg, &mut vm, &mut sys_allocator)?;
1208
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08001209 #[cfg(feature = "gpu")]
1210 // Hold on to the render server jail so it keeps running until we exit run_vm()
Dmitry Torokhove464a7a2022-01-26 13:29:36 -08001211 let (_render_server_jail, render_server_fd) =
1212 if let Some(parameters) = &cfg.gpu_render_server_parameters {
1213 let (jail, fd) = start_gpu_render_server(&cfg, parameters)?;
1214 (Some(ScopedMinijail(jail)), Some(fd))
1215 } else {
1216 (None, None)
1217 };
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08001218
David Stevens06d157a2022-01-13 23:44:48 +09001219 let init_balloon_size = components
1220 .memory_size
1221 .checked_sub(cfg.init_memory.map_or(components.memory_size, |m| {
1222 m.checked_mul(1024 * 1024).unwrap_or(u64::MAX)
1223 }))
1224 .context("failed to calculate init balloon size")?;
1225
Tomasz Nowicki64f43552022-02-22 14:14:45 +00001226 #[cfg(feature = "direct")]
1227 let mut irqs = Vec::new();
1228
1229 #[cfg(feature = "direct")]
1230 for irq in &cfg.direct_level_irq {
1231 if !sys_allocator.reserve_irq(*irq) {
1232 warn!("irq {} already reserved.", irq);
1233 }
1234 let trigger = Event::new().context("failed to create event")?;
1235 let resample = Event::new().context("failed to create event")?;
1236 irq_chip
1237 .register_irq_event(*irq, &trigger, Some(&resample))
1238 .unwrap();
1239 let direct_irq = devices::DirectIrq::new(trigger, Some(resample))
1240 .context("failed to enable interrupt forwarding")?;
1241 direct_irq
1242 .irq_enable(*irq)
1243 .context("failed to enable interrupt forwarding")?;
1244 irqs.push(direct_irq);
1245 }
1246
1247 #[cfg(feature = "direct")]
1248 for irq in &cfg.direct_edge_irq {
1249 if !sys_allocator.reserve_irq(*irq) {
1250 warn!("irq {} already reserved.", irq);
1251 }
1252 let trigger = Event::new().context("failed to create event")?;
1253 irq_chip.register_irq_event(*irq, &trigger, None).unwrap();
1254 let direct_irq = devices::DirectIrq::new(trigger, None)
1255 .context("failed to enable interrupt forwarding")?;
1256 direct_irq
1257 .irq_enable(*irq)
1258 .context("failed to enable interrupt forwarding")?;
1259 irqs.push(direct_irq);
1260 }
1261
Haiwei Li09b7b8e2022-02-18 18:16:05 +08001262 let mut iommu_attached_endpoints: BTreeMap<u32, Arc<Mutex<Box<dyn MemoryMapperTrait>>>> =
1263 BTreeMap::new();
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001264 let mut devices = create_devices(
Zach Reiznerdc748482021-04-14 13:59:30 -07001265 &cfg,
1266 &mut vm,
1267 &mut sys_allocator,
1268 &exit_evt,
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +00001269 panic_wrtube,
Haiwei Li09b7b8e2022-02-18 18:16:05 +08001270 &mut iommu_attached_endpoints,
Zach Reiznerdc748482021-04-14 13:59:30 -07001271 &mut control_tubes,
1272 wayland_device_tube,
1273 gpu_device_tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001274 vhost_user_gpu_tubes,
Zach Reiznerdc748482021-04-14 13:59:30 -07001275 balloon_device_tube,
David Stevens06d157a2022-01-13 23:44:48 +09001276 init_balloon_size,
Zach Reiznerdc748482021-04-14 13:59:30 -07001277 &mut disk_device_tubes,
1278 &mut pmem_device_tubes,
1279 &mut fs_device_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07001280 #[cfg(feature = "usb")]
Zach Reiznerdc748482021-04-14 13:59:30 -07001281 usb_provider,
1282 Arc::clone(&map_request),
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08001283 #[cfg(feature = "gpu")]
1284 render_server_fd,
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -08001285 &mut vvu_proxy_device_tubes,
Zach Reiznerdc748482021-04-14 13:59:30 -07001286 )?;
1287
Haiwei Li09b7b8e2022-02-18 18:16:05 +08001288 let mut hp_endpoints_ranges: Vec<RangeInclusive<u32>> = Vec::new();
Anton Romanov33334412022-03-22 17:48:18 +00001289 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Xiong Zhangf7874712021-12-24 10:53:59 +08001290 let mut hotplug_buses: Vec<Arc<Mutex<dyn HotPlugBus>>> = Vec::new();
Xiong Zhang1b6e0112022-03-15 11:34:16 +08001291 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1292 let mut gpe_notify_devs: Vec<(u32, Arc<Mutex<dyn GpeNotify>>)> = Vec::new();
Xiong Zhangf7874712021-12-24 10:53:59 +08001293 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1294 {
1295 #[cfg(feature = "direct")]
1296 let rp_host = cfg.pcie_rp.clone();
1297 #[cfg(not(feature = "direct"))]
Xiong Zhang626f0142022-03-12 16:05:17 +08001298 let rp_host: Vec<HostPcieRootPortParameters> = Vec::new();
Xiong Zhangf7874712021-12-24 10:53:59 +08001299
1300 // Create Pcie Root Port
1301 create_pcie_root_port(
1302 rp_host,
1303 &mut sys_allocator,
1304 &mut control_tubes,
1305 &mut devices,
1306 &mut hotplug_buses,
Haiwei Li09b7b8e2022-02-18 18:16:05 +08001307 &mut hp_endpoints_ranges,
Xiong Zhang1b6e0112022-03-15 11:34:16 +08001308 &mut gpe_notify_devs,
Xiong Zhangf7874712021-12-24 10:53:59 +08001309 )?;
1310 }
1311
Haiwei Li09b7b8e2022-02-18 18:16:05 +08001312 let (translate_response_senders, request_rx) = setup_virtio_access_platform(
1313 &mut sys_allocator,
1314 &mut iommu_attached_endpoints,
1315 &mut devices,
1316 )?;
1317
Haiwei Li87bc2fc2022-02-18 14:37:40 +08001318 let iommu_host_tube = if !iommu_attached_endpoints.is_empty() || cfg.virtio_iommu {
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001319 let (iommu_host_tube, iommu_device_tube) = Tube::pair().context("failed to create tube")?;
Haiwei Li09b7b8e2022-02-18 18:16:05 +08001320 let iommu_dev = create_iommu_device(
1321 &cfg,
1322 (1u64 << vm.get_guest_phys_addr_bits()) - 1,
1323 iommu_attached_endpoints,
1324 hp_endpoints_ranges,
1325 translate_response_senders,
1326 request_rx,
1327 iommu_device_tube,
1328 )?;
1329
1330 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
1331 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
1332 let mut dev = VirtioPciDevice::new(vm.get_memory().clone(), iommu_dev.dev, msi_device_tube)
1333 .context("failed to create virtio pci dev")?;
1334 // early reservation for viommu.
1335 dev.allocate_address(&mut sys_allocator)
1336 .context("failed to allocate resources early for virtio pci dev")?;
1337 let dev = Box::new(dev);
1338 devices.push((dev, iommu_dev.jail));
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001339 Some(iommu_host_tube)
1340 } else {
1341 None
1342 };
Haiwei Li09b7b8e2022-02-18 18:16:05 +08001343
Peter Fangc2bba082021-04-19 18:40:24 -07001344 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001345 for device in devices
1346 .iter_mut()
1347 .filter_map(|(dev, _)| dev.as_pci_device_mut())
1348 {
Peter Fangc2bba082021-04-19 18:40:24 -07001349 let sdts = device
1350 .generate_acpi(components.acpi_sdts)
1351 .or_else(|| {
1352 error!("ACPI table generation error");
1353 None
1354 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07001355 .ok_or_else(|| anyhow!("failed to generate ACPI table"))?;
Peter Fangc2bba082021-04-19 18:40:24 -07001356 components.acpi_sdts = sdts;
1357 }
1358
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001359 // KVM_CREATE_VCPU uses apic id for x86 and uses cpu id for others.
1360 let mut kvm_vcpu_ids = Vec::new();
1361
Kuo-Hsin Yang6139da62021-04-14 16:55:24 +08001362 #[cfg_attr(not(feature = "direct"), allow(unused_mut))]
Zach Reiznerdc748482021-04-14 13:59:30 -07001363 let mut linux = Arch::build_vm::<V, Vcpu>(
Trent Begin17ccaad2019-04-17 13:51:25 -06001364 components,
Zach Reiznerdc748482021-04-14 13:59:30 -07001365 &exit_evt,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001366 &reset_evt,
Zach Reiznerdc748482021-04-14 13:59:30 -07001367 &mut sys_allocator,
Trent Begin17ccaad2019-04-17 13:51:25 -06001368 &cfg.serial_parameters,
Alexandre Courbot6a8f6562022-03-24 14:43:48 +09001369 simple_jail(&cfg.jail_config, "serial")?,
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08001370 battery,
Zach Reiznera90649a2021-03-31 12:56:08 -07001371 vm,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09001372 ramoops_region,
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001373 devices,
Zach Reiznerdc748482021-04-14 13:59:30 -07001374 irq_chip,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001375 &mut kvm_vcpu_ids,
Trent Begin17ccaad2019-04-17 13:51:25 -06001376 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001377 .context("the architecture failed to build the vm")?;
Lepton Wu60893882018-11-21 11:06:18 -08001378
Daniel Verkamp1286b482021-11-30 15:14:16 -08001379 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1380 {
Xiong Zhangf7874712021-12-24 10:53:59 +08001381 for hotplug_bus in hotplug_buses.iter() {
1382 linux.hotplug_bus.push(hotplug_bus.clone());
1383 }
Xiong Zhang1b6e0112022-03-15 11:34:16 +08001384
1385 if let Some(pm) = &linux.pm {
1386 while let Some((gpe, notify_dev)) = gpe_notify_devs.pop() {
1387 pm.lock().register_gpe_notify_dev(gpe, notify_dev);
1388 }
1389 }
Daniel Verkamp1286b482021-11-30 15:14:16 -08001390 }
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001391
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08001392 #[cfg(feature = "direct")]
1393 if let Some(pmio) = &cfg.direct_pmio {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001394 let direct_io = Arc::new(
1395 devices::DirectIo::new(&pmio.path, false).context("failed to open direct io device")?,
1396 );
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08001397 for range in pmio.ranges.iter() {
1398 linux
1399 .io_bus
Junichi Uekawab180f9c2021-12-07 09:21:36 +09001400 .insert_sync(direct_io.clone(), range.base, range.len)
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08001401 .unwrap();
1402 }
1403 };
1404
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001405 #[cfg(feature = "direct")]
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07001406 if let Some(mmio) = &cfg.direct_mmio {
Xiong Zhang46471a02021-11-12 00:34:42 +08001407 let direct_mmio = Arc::new(
Junichi Uekawab180f9c2021-12-07 09:21:36 +09001408 devices::DirectMmio::new(&mmio.path, false, &mmio.ranges)
Xiong Zhang46471a02021-11-12 00:34:42 +08001409 .context("failed to open direct mmio device")?,
Daniel Verkamp6b298582021-08-16 15:37:11 -07001410 );
Xiong Zhang46471a02021-11-12 00:34:42 +08001411
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07001412 for range in mmio.ranges.iter() {
1413 linux
1414 .mmio_bus
Junichi Uekawab180f9c2021-12-07 09:21:36 +09001415 .insert_sync(direct_mmio.clone(), range.base, range.len)
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07001416 .unwrap();
1417 }
1418 };
1419
Daniel Verkamp6b298582021-08-16 15:37:11 -07001420 let gralloc = RutabagaGralloc::new().context("failed to create gralloc")?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001421 run_control(
1422 linux,
Zach Reiznerdc748482021-04-14 13:59:30 -07001423 sys_allocator,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001424 cfg,
Zach Reiznera60744b2019-02-13 17:33:32 -08001425 control_server_socket,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001426 control_tubes,
1427 balloon_host_tube,
1428 &disk_host_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07001429 #[cfg(feature = "usb")]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001430 usb_control_tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07001431 exit_evt,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001432 reset_evt,
Andrew Walbran1a19c672022-01-24 17:24:10 +00001433 crash_evt,
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +00001434 panic_rdtube,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001435 sigchld_fd,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001436 Arc::clone(&map_request),
Gurchetan Singh293913c2020-12-09 10:44:13 -08001437 gralloc,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001438 kvm_vcpu_ids,
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001439 iommu_host_tube,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001440 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07001441}
1442
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001443fn get_hp_bus<V: VmArch, Vcpu: VcpuArch>(
1444 linux: &RunnableLinuxVm<V, Vcpu>,
1445 host_addr: PciAddress,
1446) -> Result<(Arc<Mutex<dyn HotPlugBus>>, u8)> {
1447 for hp_bus in linux.hotplug_bus.iter() {
1448 if let Some(number) = hp_bus.lock().is_match(host_addr) {
1449 return Ok((hp_bus.clone(), number));
1450 }
1451 }
1452 Err(anyhow!("Failed to find a suitable hotplug bus"))
1453}
1454
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001455fn add_vfio_device<V: VmArch, Vcpu: VcpuArch>(
1456 linux: &mut RunnableLinuxVm<V, Vcpu>,
1457 sys_allocator: &mut SystemAllocator,
1458 cfg: &Config,
1459 control_tubes: &mut Vec<TaggedControlTube>,
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001460 iommu_host_tube: &Option<Tube>,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001461 vfio_path: &Path,
1462) -> Result<()> {
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001463 let host_os_str = vfio_path
1464 .file_name()
1465 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
1466 let host_str = host_os_str
1467 .to_str()
1468 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Daniel Verkamp906a38f2022-02-22 13:58:53 -08001469 let host_addr =
1470 PciAddress::from_string(host_str).context("failed to parse vfio pci address")?;
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001471
1472 let (hp_bus, bus_num) = get_hp_bus(linux, host_addr)?;
1473
Woody Chow055b81b2022-01-25 18:34:29 +09001474 let mut endpoints: BTreeMap<u32, Arc<Mutex<Box<dyn MemoryMapperTrait>>>> = BTreeMap::new();
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001475 let (vfio_pci_device, jail) = create_vfio_device(
1476 cfg,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001477 &linux.vm,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001478 sys_allocator,
1479 control_tubes,
1480 vfio_path,
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001481 Some(bus_num),
Victor Ding3f749592022-03-18 05:44:20 +00001482 None,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001483 &mut endpoints,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001484 None,
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001485 if iommu_host_tube.is_some() {
1486 IommuDevType::VirtioIommu
1487 } else {
1488 IommuDevType::NoIommu
1489 },
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001490 )?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001491
1492 let pci_address = Arch::register_pci_device(linux, vfio_pci_device, jail, sys_allocator)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001493 .context("Failed to configure pci hotplug device")?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001494
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001495 if let Some(iommu_host_tube) = iommu_host_tube {
1496 let &endpoint_addr = endpoints.iter().next().unwrap().0;
1497 let mapper = endpoints.remove(&endpoint_addr).unwrap();
1498 if let Some(vfio_wrapper) = mapper.lock().as_vfio_wrapper() {
1499 let vfio_container = vfio_wrapper.as_vfio_container();
1500 let descriptor = vfio_container.lock().into_raw_descriptor()?;
1501 let request = VirtioIOMMURequest::VfioCommand(VirtioIOMMUVfioCommand::VfioDeviceAdd {
1502 endpoint_addr,
1503 container: {
1504 // Safe because the descriptor is uniquely owned by `descriptor`.
1505 unsafe { File::from_raw_descriptor(descriptor) }
1506 },
1507 });
1508
1509 match virtio_iommu_request(iommu_host_tube, &request)
1510 .map_err(|_| VirtioIOMMUVfioError::SocketFailed)?
1511 {
1512 VirtioIOMMUResponse::VfioResponse(VirtioIOMMUVfioResult::Ok) => (),
1513 resp => bail!("Unexpected message response: {:?}", resp),
1514 }
1515 };
1516 }
1517
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001518 let host_key = HostHotPlugKey::Vfio { host_addr };
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001519 let mut hp_bus = hp_bus.lock();
1520 hp_bus.add_hotplug_device(host_key, pci_address);
1521 hp_bus.hot_plug(pci_address);
1522 Ok(())
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001523}
1524
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001525fn remove_vfio_device<V: VmArch, Vcpu: VcpuArch>(
1526 linux: &RunnableLinuxVm<V, Vcpu>,
Xiong Zhang2d45b912021-05-13 16:22:25 +08001527 sys_allocator: &mut SystemAllocator,
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001528 iommu_host_tube: &Option<Tube>,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001529 vfio_path: &Path,
1530) -> Result<()> {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001531 let host_os_str = vfio_path
1532 .file_name()
1533 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
1534 let host_str = host_os_str
1535 .to_str()
1536 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Daniel Verkamp906a38f2022-02-22 13:58:53 -08001537 let host_addr =
1538 PciAddress::from_string(host_str).context("failed to parse vfio pci address")?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001539 let host_key = HostHotPlugKey::Vfio { host_addr };
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001540 for hp_bus in linux.hotplug_bus.iter() {
1541 let mut hp_bus_lock = hp_bus.lock();
1542 if let Some(pci_addr) = hp_bus_lock.get_hotplug_device(host_key) {
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001543 if let Some(iommu_host_tube) = iommu_host_tube {
1544 let request =
1545 VirtioIOMMURequest::VfioCommand(VirtioIOMMUVfioCommand::VfioDeviceDel {
1546 endpoint_addr: pci_addr.to_u32(),
1547 });
1548 match virtio_iommu_request(iommu_host_tube, &request)
1549 .map_err(|_| VirtioIOMMUVfioError::SocketFailed)?
1550 {
1551 VirtioIOMMUResponse::VfioResponse(VirtioIOMMUVfioResult::Ok) => (),
1552 resp => bail!("Unexpected message response: {:?}", resp),
1553 }
1554 }
1555
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001556 hp_bus_lock.hot_unplug(pci_addr);
Xiong Zhang2d45b912021-05-13 16:22:25 +08001557 sys_allocator.release_pci(pci_addr.bus, pci_addr.dev, pci_addr.func);
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001558 return Ok(());
1559 }
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001560 }
1561
Daniel Verkamp6b298582021-08-16 15:37:11 -07001562 Err(anyhow!("HotPlugBus hasn't been implemented"))
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001563}
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001564
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001565fn handle_vfio_command<V: VmArch, Vcpu: VcpuArch>(
1566 linux: &mut RunnableLinuxVm<V, Vcpu>,
1567 sys_allocator: &mut SystemAllocator,
1568 cfg: &Config,
1569 add_tubes: &mut Vec<TaggedControlTube>,
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001570 iommu_host_tube: &Option<Tube>,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001571 vfio_path: &Path,
1572 add: bool,
1573) -> VmResponse {
1574 let ret = if add {
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001575 add_vfio_device(
1576 linux,
1577 sys_allocator,
1578 cfg,
1579 add_tubes,
1580 iommu_host_tube,
1581 vfio_path,
1582 )
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001583 } else {
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001584 remove_vfio_device(linux, sys_allocator, iommu_host_tube, vfio_path)
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001585 };
1586
1587 match ret {
1588 Ok(()) => VmResponse::Ok,
1589 Err(e) => {
1590 error!("hanlde_vfio_command failure: {}", e);
1591 add_tubes.clear();
1592 VmResponse::Err(base::Error::new(libc::EINVAL))
1593 }
1594 }
1595}
1596
Zach Reiznerdc748482021-04-14 13:59:30 -07001597fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
1598 mut linux: RunnableLinuxVm<V, Vcpu>,
1599 mut sys_allocator: SystemAllocator,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001600 cfg: Config,
Zach Reiznera60744b2019-02-13 17:33:32 -08001601 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001602 mut control_tubes: Vec<TaggedControlTube>,
Andrew Walbran3cd93602022-01-25 13:59:23 +00001603 balloon_host_tube: Option<Tube>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001604 disk_host_tubes: &[Tube],
Daniel Verkampf1439d42021-05-21 13:55:10 -07001605 #[cfg(feature = "usb")] usb_control_tube: Tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07001606 exit_evt: Event,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001607 reset_evt: Event,
Andrew Walbran1a19c672022-01-24 17:24:10 +00001608 crash_evt: Event,
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +00001609 panic_rdtube: Tube,
Zach Reizner55a9e502018-10-03 10:22:32 -07001610 sigchld_fd: SignalFd,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001611 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Gurchetan Singh293913c2020-12-09 10:44:13 -08001612 mut gralloc: RutabagaGralloc,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001613 kvm_vcpu_ids: Vec<usize>,
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001614 iommu_host_tube: Option<Tube>,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001615) -> Result<ExitState> {
Zach Reizner5bed0d22018-03-28 02:31:11 -07001616 #[derive(PollToken)]
1617 enum Token {
1618 Exit,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001619 Reset,
Andrew Walbran1a19c672022-01-24 17:24:10 +00001620 Crash,
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +00001621 Panic,
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001622 Suspend,
Zach Reizner5bed0d22018-03-28 02:31:11 -07001623 ChildSignal,
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001624 IrqFd { index: IrqEventIndex },
Zach Reiznera60744b2019-02-13 17:33:32 -08001625 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07001626 VmControl { index: usize },
Tomasz Nowicki98801002022-02-23 21:00:00 +00001627 DelayedIrqFd,
Zach Reizner5bed0d22018-03-28 02:31:11 -07001628 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001629
Zach Reizner19ad1f32019-12-12 18:58:50 -08001630 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08001631 .set_raw_mode()
1632 .expect("failed to set terminal raw mode");
1633
Michael Hoylee392c462020-10-07 03:29:24 -07001634 let wait_ctx = WaitContext::build_with(&[
Zach Reiznerdc748482021-04-14 13:59:30 -07001635 (&exit_evt, Token::Exit),
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001636 (&reset_evt, Token::Reset),
Andrew Walbran1a19c672022-01-24 17:24:10 +00001637 (&crash_evt, Token::Crash),
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +00001638 (&panic_rdtube, Token::Panic),
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001639 (&linux.suspend_evt, Token::Suspend),
Zach Reiznerb2110be2019-07-23 15:55:03 -07001640 (&sigchld_fd, Token::ChildSignal),
1641 ])
Daniel Verkamp6b298582021-08-16 15:37:11 -07001642 .context("failed to add descriptor to wait context")?;
Zach Reiznerb2110be2019-07-23 15:55:03 -07001643
Zach Reiznera60744b2019-02-13 17:33:32 -08001644 if let Some(socket_server) = &control_server_socket {
Michael Hoylee392c462020-10-07 03:29:24 -07001645 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08001646 .add(socket_server, Token::VmControlServer)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001647 .context("failed to add descriptor to wait context")?;
Zach Reiznera60744b2019-02-13 17:33:32 -08001648 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001649 for (index, socket) in control_tubes.iter().enumerate() {
Michael Hoylee392c462020-10-07 03:29:24 -07001650 wait_ctx
Zach Reizner55a9e502018-10-03 10:22:32 -07001651 .add(socket.as_ref(), Token::VmControl { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07001652 .context("failed to add descriptor to wait context")?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001653 }
1654
Steven Richmanf32d0b42020-06-20 21:45:32 -07001655 let events = linux
1656 .irq_chip
1657 .irq_event_tokens()
Daniel Verkamp6b298582021-08-16 15:37:11 -07001658 .context("failed to add descriptor to wait context")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07001659
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001660 for (index, _gsi, evt) in events {
Michael Hoylee392c462020-10-07 03:29:24 -07001661 wait_ctx
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001662 .add(&evt, Token::IrqFd { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07001663 .context("failed to add descriptor to wait context")?;
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08001664 }
1665
Tomasz Nowicki98801002022-02-23 21:00:00 +00001666 if let Some(delayed_ioapic_irq_trigger) = linux.irq_chip.irq_delayed_event_token()? {
1667 wait_ctx
1668 .add(&delayed_ioapic_irq_trigger, Token::DelayedIrqFd)
1669 .context("failed to add descriptor to wait context")?;
1670 }
1671
Alexandre Courbot6a8f6562022-03-24 14:43:48 +09001672 if cfg.jail_config.is_some() {
Lepton Wu20333e42019-03-14 10:48:03 -07001673 // Before starting VCPUs, in case we started with some capabilities, drop them all.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001674 drop_capabilities().context("failed to drop process capabilities")?;
Lepton Wu20333e42019-03-14 10:48:03 -07001675 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08001676
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001677 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1678 // Create a channel for GDB thread.
1679 let (to_gdb_channel, from_vcpu_channel) = if linux.gdb.is_some() {
1680 let (s, r) = mpsc::channel();
1681 (Some(s), Some(r))
1682 } else {
1683 (None, None)
1684 };
1685
Steven Richmanf32d0b42020-06-20 21:45:32 -07001686 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
1687 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
Steven Richmanf32d0b42020-06-20 21:45:32 -07001688 let use_hypervisor_signals = !linux
1689 .vm
1690 .get_hypervisor()
Andrew Walbran985491a2022-01-27 13:47:40 +00001691 .check_capability(HypervisorCap::ImmediateExit);
Anton Romanov5acc0f52022-01-28 00:18:11 +00001692 vcpu::setup_vcpu_signal_handler::<Vcpu>(use_hypervisor_signals)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07001693
Zach Reizner304e7312020-09-29 16:00:24 -07001694 let vcpus: Vec<Option<_>> = match linux.vcpus.take() {
Andrew Walbran9cfdbd92021-01-11 17:40:34 +00001695 Some(vec) => vec.into_iter().map(Some).collect(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07001696 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
1697 };
Yusuke Sato31e136a2021-08-18 11:51:38 -07001698 // Enable core scheduling before creating vCPUs so that the cookie will be
1699 // shared by all vCPU threads.
1700 // TODO(b/199312402): Avoid enabling core scheduling for the crosvm process
1701 // itself for even better performance. Only vCPUs need the feature.
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001702 if cfg.per_vm_core_scheduling {
Yusuke Sato31e136a2021-08-18 11:51:38 -07001703 if let Err(e) = enable_core_scheduling() {
1704 error!("Failed to enable core scheduling: {}", e);
1705 }
1706 }
Vineeth Pillai2b6855e2022-01-12 16:57:22 +00001707 let vcpu_cgroup_tasks_file = match &cfg.vcpu_cgroup_path {
1708 None => None,
1709 Some(cgroup_path) => {
1710 // Move main process to cgroup_path
1711 let mut f = File::create(&cgroup_path.join("tasks"))?;
1712 f.write_all(process::id().to_string().as_bytes())?;
1713 Some(f)
1714 }
1715 };
Daniel Verkamp94c35272019-09-12 13:31:30 -07001716 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Dylan Reidb0492662019-05-17 14:50:13 -07001717 let (to_vcpu_channel, from_main_channel) = mpsc::channel();
Daniel Verkampc677fb42020-09-08 13:47:49 -07001718 let vcpu_affinity = match linux.vcpu_affinity.clone() {
1719 Some(VcpuAffinity::Global(v)) => v,
1720 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(),
1721 None => Default::default(),
1722 };
Anton Romanov5acc0f52022-01-28 00:18:11 +00001723 let handle = vcpu::run_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001724 cpu_id,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001725 kvm_vcpu_ids[cpu_id],
Zach Reizner55a9e502018-10-03 10:22:32 -07001726 vcpu,
Daniel Verkamp6b298582021-08-16 15:37:11 -07001727 linux.vm.try_clone().context("failed to clone vm")?,
1728 linux
1729 .irq_chip
1730 .try_box_clone()
1731 .context("failed to clone irqchip")?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001732 linux.vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001733 linux.rt_cpus.contains(&cpu_id),
Daniel Verkampc677fb42020-09-08 13:47:49 -07001734 vcpu_affinity,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09001735 linux.delay_rt,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001736 linux.no_smt,
Zach Reizner55a9e502018-10-03 10:22:32 -07001737 vcpu_thread_barrier.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07001738 linux.has_bios,
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07001739 (*linux.io_bus).clone(),
1740 (*linux.mmio_bus).clone(),
Daniel Verkamp6b298582021-08-16 15:37:11 -07001741 exit_evt.try_clone().context("failed to clone event")?,
Andrew Walbranb28ae8e2022-01-17 14:33:10 +00001742 reset_evt.try_clone().context("failed to clone event")?,
Andrew Walbran1a19c672022-01-24 17:24:10 +00001743 crash_evt.try_clone().context("failed to clone event")?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001744 linux.vm.check_capability(VmCap::PvClockSuspend),
Dylan Reidb0492662019-05-17 14:50:13 -07001745 from_main_channel,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001746 use_hypervisor_signals,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001747 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1748 to_gdb_channel.clone(),
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001749 cfg.per_vm_core_scheduling,
1750 cfg.host_cpu_topology,
Zide Chen344e2432022-01-28 14:58:53 -08001751 cfg.privileged_vm,
Vineeth Pillai2b6855e2022-01-12 16:57:22 +00001752 match vcpu_cgroup_tasks_file {
1753 None => None,
1754 Some(ref f) => Some(
1755 f.try_clone()
1756 .context("failed to clone vcpu cgroup tasks file")?,
1757 ),
1758 },
Junichi Uekawab3a094e2022-03-29 15:41:47 +09001759 cfg.userspace_msr.clone(),
Zach Reizner55a9e502018-10-03 10:22:32 -07001760 )?;
Dylan Reidb0492662019-05-17 14:50:13 -07001761 vcpu_handles.push((handle, to_vcpu_channel));
Dylan Reid059a1882018-07-23 17:58:09 -07001762 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001763
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001764 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1765 // Spawn GDB thread.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001766 if let Some((gdb_port_num, gdb_control_tube)) = linux.gdb.take() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001767 let to_vcpu_channels = vcpu_handles
1768 .iter()
1769 .map(|(_handle, channel)| channel.clone())
1770 .collect();
1771 let target = GdbStub::new(
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001772 gdb_control_tube,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001773 to_vcpu_channels,
1774 from_vcpu_channel.unwrap(), // Must succeed to unwrap()
1775 );
1776 thread::Builder::new()
1777 .name("gdb".to_owned())
1778 .spawn(move || gdb_thread(target, gdb_port_num))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001779 .context("failed to spawn GDB thread")?;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001780 };
1781
Dylan Reid059a1882018-07-23 17:58:09 -07001782 vcpu_thread_barrier.wait();
1783
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001784 let mut exit_state = ExitState::Stop;
Charles William Dick54045012021-07-27 19:11:53 +09001785 let mut balloon_stats_id: u64 = 0;
1786
Michael Hoylee392c462020-10-07 03:29:24 -07001787 'wait: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07001788 let events = {
Michael Hoylee392c462020-10-07 03:29:24 -07001789 match wait_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001790 Ok(v) => v,
1791 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001792 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001793 break;
1794 }
1795 }
1796 };
Zach Reiznera60744b2019-02-13 17:33:32 -08001797
1798 let mut vm_control_indices_to_remove = Vec::new();
Michael Hoylee392c462020-10-07 03:29:24 -07001799 for event in events.iter().filter(|e| e.is_readable) {
1800 match event.token {
Zach Reizner5bed0d22018-03-28 02:31:11 -07001801 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001802 info!("vcpu requested shutdown");
Michael Hoylee392c462020-10-07 03:29:24 -07001803 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001804 }
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001805 Token::Reset => {
1806 info!("vcpu requested reset");
1807 exit_state = ExitState::Reset;
1808 break 'wait;
1809 }
Andrew Walbran1a19c672022-01-24 17:24:10 +00001810 Token::Crash => {
1811 info!("vcpu crashed");
1812 exit_state = ExitState::Crash;
1813 break 'wait;
1814 }
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +00001815 Token::Panic => {
1816 let mut break_to_wait: bool = true;
1817 match panic_rdtube.recv::<u8>() {
1818 Ok(panic_code) => {
1819 let panic_code = PvPanicCode::from_u8(panic_code);
1820 info!("Guest reported panic [Code: {}]", panic_code);
1821 if panic_code == PvPanicCode::CrashLoaded {
1822 // VM is booting to crash kernel.
1823 break_to_wait = false;
1824 }
1825 }
1826 Err(e) => {
1827 warn!("failed to recv panic event: {} ", e);
1828 }
1829 }
1830 if break_to_wait {
1831 exit_state = ExitState::GuestPanic;
1832 break 'wait;
1833 }
1834 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001835 Token::Suspend => {
1836 info!("VM requested suspend");
1837 linux.suspend_evt.read().unwrap();
Anton Romanov5acc0f52022-01-28 00:18:11 +00001838 vcpu::kick_all_vcpus(
Zach Reiznerdc748482021-04-14 13:59:30 -07001839 &vcpu_handles,
1840 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08001841 VcpuControl::RunState(VmRunMode::Suspending),
Zach Reiznerdc748482021-04-14 13:59:30 -07001842 );
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001843 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001844 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001845 // Print all available siginfo structs, then exit the loop.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001846 while let Some(siginfo) =
1847 sigchld_fd.read().context("failed to create signalfd")?
1848 {
Zach Reizner3ba00982019-01-23 19:04:43 -08001849 let pid = siginfo.ssi_pid;
1850 let pid_label = match linux.pid_debug_label_map.get(&pid) {
1851 Some(label) => format!("{} (pid {})", label, pid),
1852 None => format!("pid {}", pid),
1853 };
David Tolnayf5032762018-12-03 10:46:45 -08001854 error!(
1855 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08001856 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08001857 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08001858 }
Michael Hoylee392c462020-10-07 03:29:24 -07001859 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001860 }
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001861 Token::IrqFd { index } => {
1862 if let Err(e) = linux.irq_chip.service_irq_event(index) {
1863 error!("failed to signal irq {}: {}", index, e);
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08001864 }
1865 }
Tomasz Nowicki98801002022-02-23 21:00:00 +00001866 Token::DelayedIrqFd => {
1867 if let Err(e) = linux.irq_chip.process_delayed_irq_events() {
1868 warn!("can't deliver delayed irqs: {}", e);
1869 }
1870 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001871 Token::VmControlServer => {
1872 if let Some(socket_server) = &control_server_socket {
1873 match socket_server.accept() {
1874 Ok(socket) => {
Michael Hoylee392c462020-10-07 03:29:24 -07001875 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08001876 .add(
1877 &socket,
1878 Token::VmControl {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001879 index: control_tubes.len(),
Zach Reiznera60744b2019-02-13 17:33:32 -08001880 },
1881 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001882 .context("failed to add descriptor to wait context")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001883 control_tubes.push(TaggedControlTube::Vm(Tube::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08001884 }
1885 Err(e) => error!("failed to accept socket: {}", e),
1886 }
1887 }
1888 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001889 Token::VmControl { index } => {
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001890 let mut add_tubes = Vec::new();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001891 if let Some(socket) = control_tubes.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001892 match socket {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001893 TaggedControlTube::Vm(tube) => match tube.recv::<VmRequest>() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001894 Ok(request) => {
1895 let mut run_mode_opt = None;
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001896 let response = match request {
1897 VmRequest::VfioCommand { vfio_path, add } => {
1898 handle_vfio_command(
1899 &mut linux,
1900 &mut sys_allocator,
1901 &cfg,
1902 &mut add_tubes,
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001903 &iommu_host_tube,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001904 &vfio_path,
1905 add,
1906 )
1907 }
1908 _ => request.execute(
1909 &mut run_mode_opt,
Andrew Walbran3cd93602022-01-25 13:59:23 +00001910 balloon_host_tube.as_ref(),
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001911 &mut balloon_stats_id,
1912 disk_host_tubes,
Peter Fang6ca03232021-12-20 02:17:21 -08001913 &mut linux.pm,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001914 #[cfg(feature = "usb")]
1915 Some(&usb_control_tube),
1916 #[cfg(not(feature = "usb"))]
1917 None,
1918 &mut linux.bat_control,
1919 &vcpu_handles,
1920 ),
1921 };
1922
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001923 if let Err(e) = tube.send(&response) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001924 error!("failed to send VmResponse: {}", e);
1925 }
1926 if let Some(run_mode) = run_mode_opt {
1927 info!("control socket changed run mode to {}", run_mode);
1928 match run_mode {
1929 VmRunMode::Exiting => {
Michael Hoylee392c462020-10-07 03:29:24 -07001930 break 'wait;
Jakub Starond99cd0a2019-04-11 14:09:39 -07001931 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001932 other => {
Chuanxiao Dong2bbe85c2020-11-12 17:18:07 +08001933 if other == VmRunMode::Running {
Daniel Verkampda4e8a92021-07-21 13:49:02 -07001934 for dev in &linux.resume_notify_devices {
1935 dev.lock().resume_imminent();
1936 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001937 }
Anton Romanov5acc0f52022-01-28 00:18:11 +00001938 vcpu::kick_all_vcpus(
Steven Richman11dc6712020-09-02 15:39:14 -07001939 &vcpu_handles,
Zach Reiznerdc748482021-04-14 13:59:30 -07001940 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08001941 VcpuControl::RunState(other),
Steven Richman11dc6712020-09-02 15:39:14 -07001942 );
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001943 }
1944 }
1945 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001946 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001947 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001948 if let TubeError::Disconnected = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001949 vm_control_indices_to_remove.push(index);
1950 } else {
1951 error!("failed to recv VmRequest: {}", e);
1952 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001953 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001954 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001955 TaggedControlTube::VmMemory(tube) => {
1956 match tube.recv::<VmMemoryRequest>() {
1957 Ok(request) => {
1958 let response = request.execute(
1959 &mut linux.vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07001960 &mut sys_allocator,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001961 Arc::clone(&map_request),
1962 &mut gralloc,
1963 );
1964 if let Err(e) = tube.send(&response) {
1965 error!("failed to send VmMemoryControlResponse: {}", e);
1966 }
1967 }
1968 Err(e) => {
1969 if let TubeError::Disconnected = e {
1970 vm_control_indices_to_remove.push(index);
1971 } else {
1972 error!("failed to recv VmMemoryControlRequest: {}", e);
1973 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001974 }
1975 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001976 }
1977 TaggedControlTube::VmIrq(tube) => match tube.recv::<VmIrqRequest>() {
Xiong Zhang2515b752019-09-19 10:29:02 +08001978 Ok(request) => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001979 let response = {
1980 let irq_chip = &mut linux.irq_chip;
1981 request.execute(
1982 |setup| match setup {
Vikram Auradkar0953c582022-03-21 17:33:54 -07001983 IrqSetup::Event(irq, ev, _, _, _) => {
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001984 if let Some(event_index) = irq_chip
1985 .register_irq_event(irq, ev, None)?
1986 {
1987 match wait_ctx.add(
1988 ev,
1989 Token::IrqFd {
1990 index: event_index
1991 },
1992 ) {
1993 Err(e) => {
1994 warn!("failed to add IrqFd to poll context: {}", e);
1995 Err(e)
1996 },
1997 Ok(_) => {
1998 Ok(())
1999 }
2000 }
2001 } else {
2002 Ok(())
2003 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002004 }
2005 IrqSetup::Route(route) => irq_chip.route_irq(route),
Xiong Zhang4fbc5542021-06-01 11:29:14 +08002006 IrqSetup::UnRegister(irq, ev) => irq_chip.unregister_irq_event(irq, ev),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002007 },
Zach Reiznerdc748482021-04-14 13:59:30 -07002008 &mut sys_allocator,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002009 )
2010 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002011 if let Err(e) = tube.send(&response) {
Xiong Zhang2515b752019-09-19 10:29:02 +08002012 error!("failed to send VmIrqResponse: {}", e);
2013 }
2014 }
2015 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002016 if let TubeError::Disconnected = e {
Xiong Zhang2515b752019-09-19 10:29:02 +08002017 vm_control_indices_to_remove.push(index);
2018 } else {
2019 error!("failed to recv VmIrqRequest: {}", e);
2020 }
2021 }
2022 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002023 TaggedControlTube::VmMsync(tube) => {
2024 match tube.recv::<VmMsyncRequest>() {
2025 Ok(request) => {
2026 let response = request.execute(&mut linux.vm);
2027 if let Err(e) = tube.send(&response) {
2028 error!("failed to send VmMsyncResponse: {}", e);
2029 }
2030 }
2031 Err(e) => {
2032 if let TubeError::Disconnected = e {
2033 vm_control_indices_to_remove.push(index);
2034 } else {
2035 error!("failed to recv VmMsyncRequest: {}", e);
2036 }
Daniel Verkampe1980a92020-02-07 11:00:55 -08002037 }
2038 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002039 }
2040 TaggedControlTube::Fs(tube) => match tube.recv::<FsMappingRequest>() {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002041 Ok(request) => {
2042 let response =
Zach Reiznerdc748482021-04-14 13:59:30 -07002043 request.execute(&mut linux.vm, &mut sys_allocator);
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002044 if let Err(e) = tube.send(&response) {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002045 error!("failed to send VmResponse: {}", e);
2046 }
2047 }
2048 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002049 if let TubeError::Disconnected = e {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002050 vm_control_indices_to_remove.push(index);
2051 } else {
2052 error!("failed to recv VmResponse: {}", e);
2053 }
2054 }
2055 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08002056 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002057 }
Xiong Zhangc78e72b2021-04-08 11:31:41 +08002058 if !add_tubes.is_empty() {
2059 for (idx, socket) in add_tubes.iter().enumerate() {
2060 wait_ctx
2061 .add(
2062 socket.as_ref(),
2063 Token::VmControl {
2064 index: idx + control_tubes.len(),
2065 },
2066 )
2067 .context(
2068 "failed to add hotplug vfio-pci descriptor ot wait context",
2069 )?;
2070 }
2071 control_tubes.append(&mut add_tubes);
2072 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002073 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002074 }
2075 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002076
Vikram Auradkarede68c72021-07-01 14:33:54 -07002077 // It's possible more data is readable and buffered while the socket is hungup,
2078 // so don't delete the tube from the poll context until we're sure all the
2079 // data is read.
2080 // Below case covers a condition where we have received a hungup event and the tube is not
2081 // readable.
2082 // In case of readable tube, once all data is read, any attempt to read more data on hungup
2083 // tube should fail. On such failure, we get Disconnected error and index gets added to
2084 // vm_control_indices_to_remove by the time we reach here.
2085 for event in events.iter().filter(|e| e.is_hungup && !e.is_readable) {
2086 if let Token::VmControl { index } = event.token {
2087 vm_control_indices_to_remove.push(index);
Zach Reizner39aa26b2017-12-12 18:03:23 -08002088 }
2089 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002090
2091 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08002092 // preserves correct indexes as each element is removed.
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07002093 vm_control_indices_to_remove.sort_unstable_by_key(|&k| Reverse(k));
Zach Reiznera60744b2019-02-13 17:33:32 -08002094 vm_control_indices_to_remove.dedup();
2095 for index in vm_control_indices_to_remove {
Michael Hoylee392c462020-10-07 03:29:24 -07002096 // Delete the socket from the `wait_ctx` synchronously. Otherwise, the kernel will do
2097 // this automatically when the FD inserted into the `wait_ctx` is closed after this
Zide Chen89584072019-11-14 10:33:51 -08002098 // if-block, but this removal can be deferred unpredictably. In some instances where the
Michael Hoylee392c462020-10-07 03:29:24 -07002099 // system is under heavy load, we can even get events returned by `wait_ctx` for an FD
Zide Chen89584072019-11-14 10:33:51 -08002100 // that has already been closed. Because the token associated with that spurious event
2101 // now belongs to a different socket, the control loop will start to interact with
2102 // sockets that might not be ready to use. This can cause incorrect hangup detection or
2103 // blocking on a socket that will never be ready. See also: crbug.com/1019986
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002104 if let Some(socket) = control_tubes.get(index) {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002105 wait_ctx
2106 .delete(socket)
2107 .context("failed to remove descriptor from wait context")?;
Zide Chen89584072019-11-14 10:33:51 -08002108 }
2109
2110 // This line implicitly drops the socket at `index` when it gets returned by
2111 // `swap_remove`. After this line, the socket at `index` is not the one from
2112 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
Michael Hoylee392c462020-10-07 03:29:24 -07002113 // use `wait_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002114 control_tubes.swap_remove(index);
2115 if let Some(tube) = control_tubes.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07002116 wait_ctx
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002117 .modify(tube, EventType::Read, Token::VmControl { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002118 .context("failed to add descriptor to wait context")?;
Zach Reiznera60744b2019-02-13 17:33:32 -08002119 }
2120 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002121 }
2122
Anton Romanov5acc0f52022-01-28 00:18:11 +00002123 vcpu::kick_all_vcpus(
Zach Reiznerdc748482021-04-14 13:59:30 -07002124 &vcpu_handles,
2125 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08002126 VcpuControl::RunState(VmRunMode::Exiting),
Zach Reiznerdc748482021-04-14 13:59:30 -07002127 );
Steven Richman11dc6712020-09-02 15:39:14 -07002128 for (handle, _) in vcpu_handles {
2129 if let Err(e) = handle.join() {
2130 error!("failed to join vcpu thread: {:?}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08002131 }
2132 }
2133
Daniel Verkamp94c35272019-09-12 13:31:30 -07002134 // Explicitly drop the VM structure here to allow the devices to clean up before the
2135 // control sockets are closed when this function exits.
2136 mem::drop(linux);
2137
Zach Reizner19ad1f32019-12-12 18:58:50 -08002138 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08002139 .set_canon_mode()
2140 .expect("failed to restore canonical mode for terminal");
2141
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002142 Ok(exit_state)
Zach Reizner39aa26b2017-12-12 18:03:23 -08002143}
Daniel Verkamp5586ff52022-02-24 16:34:55 -08002144
2145#[cfg(test)]
2146mod tests {
2147 use super::*;
Xiong Zhang626f0142022-03-12 16:05:17 +08002148 use std::path::PathBuf;
Daniel Verkamp5586ff52022-02-24 16:34:55 -08002149
2150 // Create a file-backed mapping parameters struct with the given `address` and `size` and other
2151 // parameters set to default values.
2152 fn test_file_backed_mapping(address: u64, size: u64) -> FileBackedMappingParameters {
2153 FileBackedMappingParameters {
2154 address,
2155 size,
2156 path: PathBuf::new(),
2157 offset: 0,
2158 writable: false,
2159 sync: false,
2160 }
2161 }
2162
2163 #[test]
2164 fn guest_mem_file_backed_mappings_overlap() {
2165 // Base case: no file mappings; output layout should be identical.
2166 assert_eq!(
2167 punch_holes_in_guest_mem_layout_for_mappings(
2168 vec![
2169 (GuestAddress(0), 0xD000_0000),
2170 (GuestAddress(0x1_0000_0000), 0x8_0000),
2171 ],
2172 &[]
2173 ),
2174 vec![
2175 (GuestAddress(0), 0xD000_0000),
2176 (GuestAddress(0x1_0000_0000), 0x8_0000),
2177 ]
2178 );
2179
2180 // File mapping that does not overlap guest memory.
2181 assert_eq!(
2182 punch_holes_in_guest_mem_layout_for_mappings(
2183 vec![
2184 (GuestAddress(0), 0xD000_0000),
2185 (GuestAddress(0x1_0000_0000), 0x8_0000),
2186 ],
2187 &[test_file_backed_mapping(0xD000_0000, 0x1000)]
2188 ),
2189 vec![
2190 (GuestAddress(0), 0xD000_0000),
2191 (GuestAddress(0x1_0000_0000), 0x8_0000),
2192 ]
2193 );
2194
2195 // File mapping at the start of the low address space region.
2196 assert_eq!(
2197 punch_holes_in_guest_mem_layout_for_mappings(
2198 vec![
2199 (GuestAddress(0), 0xD000_0000),
2200 (GuestAddress(0x1_0000_0000), 0x8_0000),
2201 ],
2202 &[test_file_backed_mapping(0, 0x2000)]
2203 ),
2204 vec![
2205 (GuestAddress(0x2000), 0xD000_0000 - 0x2000),
2206 (GuestAddress(0x1_0000_0000), 0x8_0000),
2207 ]
2208 );
2209
2210 // File mapping at the end of the low address space region.
2211 assert_eq!(
2212 punch_holes_in_guest_mem_layout_for_mappings(
2213 vec![
2214 (GuestAddress(0), 0xD000_0000),
2215 (GuestAddress(0x1_0000_0000), 0x8_0000),
2216 ],
2217 &[test_file_backed_mapping(0xD000_0000 - 0x2000, 0x2000)]
2218 ),
2219 vec![
2220 (GuestAddress(0), 0xD000_0000 - 0x2000),
2221 (GuestAddress(0x1_0000_0000), 0x8_0000),
2222 ]
2223 );
2224
2225 // File mapping fully contained within the middle of the low address space region.
2226 assert_eq!(
2227 punch_holes_in_guest_mem_layout_for_mappings(
2228 vec![
2229 (GuestAddress(0), 0xD000_0000),
2230 (GuestAddress(0x1_0000_0000), 0x8_0000),
2231 ],
2232 &[test_file_backed_mapping(0x1000, 0x2000)]
2233 ),
2234 vec![
2235 (GuestAddress(0), 0x1000),
2236 (GuestAddress(0x3000), 0xD000_0000 - 0x3000),
2237 (GuestAddress(0x1_0000_0000), 0x8_0000),
2238 ]
2239 );
2240
2241 // File mapping at the start of the high address space region.
2242 assert_eq!(
2243 punch_holes_in_guest_mem_layout_for_mappings(
2244 vec![
2245 (GuestAddress(0), 0xD000_0000),
2246 (GuestAddress(0x1_0000_0000), 0x8_0000),
2247 ],
2248 &[test_file_backed_mapping(0x1_0000_0000, 0x2000)]
2249 ),
2250 vec![
2251 (GuestAddress(0), 0xD000_0000),
2252 (GuestAddress(0x1_0000_2000), 0x8_0000 - 0x2000),
2253 ]
2254 );
2255
2256 // File mapping at the end of the high address space region.
2257 assert_eq!(
2258 punch_holes_in_guest_mem_layout_for_mappings(
2259 vec![
2260 (GuestAddress(0), 0xD000_0000),
2261 (GuestAddress(0x1_0000_0000), 0x8_0000),
2262 ],
2263 &[test_file_backed_mapping(0x1_0008_0000 - 0x2000, 0x2000)]
2264 ),
2265 vec![
2266 (GuestAddress(0), 0xD000_0000),
2267 (GuestAddress(0x1_0000_0000), 0x8_0000 - 0x2000),
2268 ]
2269 );
2270
2271 // File mapping fully contained within the middle of the high address space region.
2272 assert_eq!(
2273 punch_holes_in_guest_mem_layout_for_mappings(
2274 vec![
2275 (GuestAddress(0), 0xD000_0000),
2276 (GuestAddress(0x1_0000_0000), 0x8_0000),
2277 ],
2278 &[test_file_backed_mapping(0x1_0000_1000, 0x2000)]
2279 ),
2280 vec![
2281 (GuestAddress(0), 0xD000_0000),
2282 (GuestAddress(0x1_0000_0000), 0x1000),
2283 (GuestAddress(0x1_0000_3000), 0x8_0000 - 0x3000),
2284 ]
2285 );
2286
2287 // File mapping overlapping two guest memory regions.
2288 assert_eq!(
2289 punch_holes_in_guest_mem_layout_for_mappings(
2290 vec![
2291 (GuestAddress(0), 0xD000_0000),
2292 (GuestAddress(0x1_0000_0000), 0x8_0000),
2293 ],
2294 &[test_file_backed_mapping(0xA000_0000, 0x60002000)]
2295 ),
2296 vec![
2297 (GuestAddress(0), 0xA000_0000),
2298 (GuestAddress(0x1_0000_2000), 0x8_0000 - 0x2000),
2299 ]
2300 );
2301 }
2302}