blob: b8535d02f3920831647531df3845ee3968d445c2 [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Chuanxiao Dongcb03ec62022-01-20 08:25:38 +08005use std::cmp::{max, Reverse};
Daniel Verkamp5586ff52022-02-24 16:34:55 -08006use std::collections::{BTreeMap, BTreeSet};
Anton Romanov5acc0f52022-01-28 00:18:11 +00007use std::convert::TryInto;
Dylan Reid059a1882018-07-23 17:58:09 -07008use std::fs::{File, OpenOptions};
Vineeth Pillai2b6855e2022-01-12 16:57:22 +00009use std::io::prelude::*;
Federico 'Morg' Pareschia1184822021-09-09 10:52:58 +090010use std::io::stdin;
Steven Richmanf32d0b42020-06-20 21:45:32 -070011use std::iter;
Daniel Verkamp94c35272019-09-12 13:31:30 -070012use std::mem;
Haiwei Li09b7b8e2022-02-18 18:16:05 +080013use std::ops::RangeInclusive;
Anton Romanovd43ae3c2022-01-31 17:32:54 +000014#[cfg(feature = "gpu")]
15use std::os::unix::net::UnixStream;
16use std::os::unix::prelude::OpenOptionsExt;
Xiong Zhangf7874712021-12-24 10:53:59 +080017use std::path::{Path, PathBuf};
Dylan Reidb0492662019-05-17 14:50:13 -070018use std::sync::{mpsc, Arc, Barrier};
Hikaru Nishida584e52c2021-04-27 17:37:08 +090019use std::time::Duration;
Dylan Reidb0492662019-05-17 14:50:13 -070020
Vineeth Pillai2b6855e2022-01-12 16:57:22 +000021use std::process;
Anton Romanov5acc0f52022-01-28 00:18:11 +000022#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reizner39aa26b2017-12-12 18:03:23 -080023use std::thread;
Zach Reizner39aa26b2017-12-12 18:03:23 -080024
Anton Romanov5acc0f52022-01-28 00:18:11 +000025use libc;
Zach Reizner39aa26b2017-12-12 18:03:23 -080026
Tomasz Jeznach42644642020-05-20 23:27:59 -070027use acpi_tables::sdt::SDT;
28
Daniel Verkamp6b298582021-08-16 15:37:11 -070029use anyhow::{anyhow, bail, Context, Result};
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080030use base::*;
Daniel Verkamp578e7cc2022-03-01 22:34:52 -080031use base::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Anton Romanov5acc0f52022-01-28 00:18:11 +000032use devices::serial_device::SerialHardware;
Zide Chenafdb9382021-06-17 12:04:43 -070033use devices::vfio::{VfioCommonSetup, VfioCommonTrait};
Woody Chow055b81b2022-01-25 18:34:29 +090034use devices::virtio::memory_mapper::MemoryMapperTrait;
Anton Romanovd43ae3c2022-01-31 17:32:54 +000035#[cfg(feature = "gpu")]
Anton Romanov5acc0f52022-01-28 00:18:11 +000036use devices::virtio::{self, EventDevice};
paulhsiace17e6e2020-08-28 18:37:45 +080037#[cfg(feature = "audio")]
38use devices::Ac97Dev;
Xiong Zhang17b0daf2019-04-23 17:14:50 +080039use devices::{
Anton Romanov5acc0f52022-01-28 00:18:11 +000040 self, BusDeviceObj, HostHotPlugKey, HotPlugBus, IrqEventIndex, KvmKernelIrqChip, PciAddress,
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +000041 PciBridge, PciDevice, PcieHostRootPort, PcieRootPort, PvPanicCode, PvPanicPciDevice,
Woody Chow055b81b2022-01-25 18:34:29 +090042 StubPciDevice, VirtioPciDevice,
Xiong Zhang17b0daf2019-04-23 17:14:50 +080043};
Chuanxiao Donga8d427b2022-01-07 10:26:24 +080044use devices::{CoIommuDev, IommuDevType};
Daniel Verkampf1439d42021-05-21 13:55:10 -070045#[cfg(feature = "usb")]
46use devices::{HostBackendDeviceProvider, XhciController};
Steven Richmanf32d0b42020-06-20 21:45:32 -070047use hypervisor::kvm::{Kvm, KvmVcpu, KvmVm};
Anton Romanov5acc0f52022-01-28 00:18:11 +000048use hypervisor::{HypervisorCap, ProtectionType, Vm, VmCap};
Allen Webbf3024c82020-06-19 07:19:48 -070049use minijail::{self, Minijail};
Anton Romanov5acc0f52022-01-28 00:18:11 +000050use resources::{Alloc, SystemAllocator};
Gurchetan Singh293913c2020-12-09 10:44:13 -080051use rutabaga_gfx::RutabagaGralloc;
Dylan Reidb0492662019-05-17 14:50:13 -070052use sync::Mutex;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080053use vm_control::*;
Sergey Senozhatskyd78d05b2021-04-13 20:59:58 +090054use vm_memory::{GuestAddress, GuestMemory, MemoryPolicy};
Zach Reizner39aa26b2017-12-12 18:03:23 -080055
Keiichi Watanabec5262e92020-10-21 15:57:33 +090056#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
57use crate::gdb::{gdb_thread, GdbStub};
Daniel Verkamp5586ff52022-02-24 16:34:55 -080058use crate::{Config, Executable, FileBackedMappingParameters, SharedDir, SharedDirKind, VfioType};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070059use arch::{
Keiichi Watanabe553d2192021-08-16 16:42:27 +090060 self, LinuxArch, RunnableLinuxVm, VcpuAffinity, VirtioDeviceStub, VmComponents, VmImage,
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070061};
Sonny Raoed517d12018-02-13 22:09:43 -080062
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080063#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070064use {
65 aarch64::AArch64 as Arch,
Steven Richman11dc6712020-09-02 15:39:14 -070066 devices::IrqChipAArch64 as IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -070067 hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
68};
Zach Reizner55a9e502018-10-03 10:22:32 -070069#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070070use {
Steven Richman11dc6712020-09-02 15:39:14 -070071 devices::{IrqChipX86_64 as IrqChipArch, KvmSplitIrqChip},
72 hypervisor::{VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
Steven Richmanf32d0b42020-06-20 21:45:32 -070073 x86_64::X8664arch as Arch,
74};
Zach Reizner39aa26b2017-12-12 18:03:23 -080075
Anton Romanov5acc0f52022-01-28 00:18:11 +000076mod device_helpers;
77use device_helpers::*;
78mod jail_helpers;
79use jail_helpers::*;
80mod vcpu;
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +090081
David Tolnay2b089fc2019-03-04 15:33:22 -080082#[cfg(feature = "gpu")]
Anton Romanov5acc0f52022-01-28 00:18:11 +000083mod gpu;
Chirantan Ekbote44292f52021-06-25 18:31:41 +090084#[cfg(feature = "gpu")]
Dmitry Torokhove464a7a2022-01-26 13:29:36 -080085pub use gpu::GpuRenderServerParameters;
86#[cfg(feature = "gpu")]
Anton Romanov5acc0f52022-01-28 00:18:11 +000087use gpu::*;
Jorge E. Moreirad4562d02021-06-28 16:21:12 -070088
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080089// gpu_device_tube is not used when GPU support is disabled.
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -070090#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -080091fn create_virtio_devices(
92 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -070093 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -070094 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -070095 _exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080096 wayland_device_tube: Tube,
97 gpu_device_tube: Tube,
Alexandre Courbote55b7912022-03-04 16:54:38 +090098 vhost_user_gpu_tubes: Vec<(Tube, Tube, Tube)>,
Andrew Walbran3cd93602022-01-25 13:59:23 +000099 balloon_device_tube: Option<Tube>,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800100 balloon_inflate_tube: Option<Tube>,
David Stevens06d157a2022-01-13 23:44:48 +0900101 init_balloon_size: u64,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800102 disk_device_tubes: &mut Vec<Tube>,
103 pmem_device_tubes: &mut Vec<Tube>,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -0800104 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800105 fs_device_tubes: &mut Vec<Tube>,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -0800106 #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>,
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -0800107 vvu_proxy_device_tubes: &mut Vec<Tube>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800108) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -0700109 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -0800110
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900111 #[cfg(feature = "gpu")]
Alexandre Courbote55b7912022-03-04 16:54:38 +0900112 for (opt, (host_gpu_tube, device_gpu_tube, device_control_tube)) in
113 cfg.vhost_user_gpu.iter().zip(vhost_user_gpu_tubes)
114 {
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900115 devs.push(create_vhost_user_gpu_device(
116 cfg,
117 opt,
Alexandre Courbote55b7912022-03-04 16:54:38 +0900118 (host_gpu_tube, device_gpu_tube),
119 device_control_tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900120 )?);
121 }
122
Abhishek Bhardwaj103c1b72021-11-01 15:52:23 -0700123 for opt in &cfg.vvu_proxy {
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -0800124 devs.push(create_vvu_proxy_device(
125 cfg,
126 opt,
127 vvu_proxy_device_tubes.remove(0),
128 )?);
Abhishek Bhardwaj103c1b72021-11-01 15:52:23 -0700129 }
130
David Tolnayfa701712019-02-13 16:42:54 -0800131 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800132 let mut resource_bridges = Vec::<Tube>::new();
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900133
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900134 if !cfg.wayland_socket_paths.is_empty() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900135 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800136 let mut wl_resource_bridge = None::<Tube>;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900137
138 #[cfg(feature = "gpu")]
139 {
Jason Macnakcc7070b2019-11-06 14:48:12 -0800140 if cfg.gpu_parameters.is_some() {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700141 let (wl_socket, gpu_socket) = Tube::pair().context("failed to create tube")?;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900142 resource_bridges.push(gpu_socket);
143 wl_resource_bridge = Some(wl_socket);
144 }
145 }
146
147 devs.push(create_wayland_device(
148 cfg,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800149 wayland_device_tube,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900150 wl_resource_bridge,
151 )?);
152 }
David Tolnayfa701712019-02-13 16:42:54 -0800153
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900154 #[cfg(feature = "video-decoder")]
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900155 let video_dec_cfg = if let Some(backend) = cfg.video_dec {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700156 let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
Daniel Verkampffb59122021-03-18 14:06:15 -0700157 resource_bridges.push(gpu_tube);
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900158 Some((video_tube, backend))
Daniel Verkampffb59122021-03-18 14:06:15 -0700159 } else {
160 None
161 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900162
163 #[cfg(feature = "video-encoder")]
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900164 let video_enc_cfg = if let Some(backend) = cfg.video_enc {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700165 let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
Daniel Verkampffb59122021-03-18 14:06:15 -0700166 resource_bridges.push(gpu_tube);
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900167 Some((video_tube, backend))
Daniel Verkampffb59122021-03-18 14:06:15 -0700168 } else {
169 None
170 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900171
Zach Reizner3a8100a2017-09-13 19:15:43 -0700172 #[cfg(feature = "gpu")]
173 {
Noah Golddc7f52b2020-02-01 13:01:58 -0800174 if let Some(gpu_parameters) = &cfg.gpu_parameters {
Anton Romanov5acc0f52022-01-28 00:18:11 +0000175 let mut gpu_display_w = virtio::DEFAULT_DISPLAY_WIDTH;
176 let mut gpu_display_h = virtio::DEFAULT_DISPLAY_HEIGHT;
Jason Macnakd659a0d2021-03-15 15:33:01 -0700177 if !gpu_parameters.displays.is_empty() {
178 gpu_display_w = gpu_parameters.displays[0].width;
179 gpu_display_h = gpu_parameters.displays[0].height;
180 }
181
Zach Reizner65b98f12019-11-22 17:34:58 -0800182 let mut event_devices = Vec::new();
183 if cfg.display_window_mouse {
184 let (event_device_socket, virtio_dev_socket) =
Daniel Verkamp6b298582021-08-16 15:37:11 -0700185 UnixStream::pair().context("failed to create socket")?;
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000186 let (multi_touch_width, multi_touch_height) = cfg
187 .virtio_multi_touch
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700188 .first()
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800189 .as_ref()
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000190 .map(|multi_touch_spec| multi_touch_spec.get_size())
Jason Macnakd659a0d2021-03-15 15:33:01 -0700191 .unwrap_or((gpu_display_w, gpu_display_h));
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000192 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700193 // u32::MAX is the least likely to collide with the indices generated above for
194 // the multi_touch options, which begin at 0.
195 u32::MAX,
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800196 virtio_dev_socket,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000197 multi_touch_width,
198 multi_touch_height,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700199 virtio::base_features(cfg.protected_vm),
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800200 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700201 .context("failed to set up mouse device")?;
Zach Reizner65b98f12019-11-22 17:34:58 -0800202 devs.push(VirtioDeviceStub {
203 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700204 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -0800205 });
206 event_devices.push(EventDevice::touchscreen(event_device_socket));
207 }
208 if cfg.display_window_keyboard {
209 let (event_device_socket, virtio_dev_socket) =
Daniel Verkamp6b298582021-08-16 15:37:11 -0700210 UnixStream::pair().context("failed to create socket")?;
Noah Goldd4ca29b2020-10-27 12:21:52 -0700211 let dev = virtio::new_keyboard(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700212 // u32::MAX is the least likely to collide with the indices generated above for
213 // the multi_touch options, which begin at 0.
214 u32::MAX,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700215 virtio_dev_socket,
216 virtio::base_features(cfg.protected_vm),
217 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700218 .context("failed to set up keyboard device")?;
Zach Reizner65b98f12019-11-22 17:34:58 -0800219 devs.push(VirtioDeviceStub {
220 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700221 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -0800222 });
223 event_devices.push(EventDevice::keyboard(event_device_socket));
224 }
Chia-I Wu16fb6592021-11-10 11:45:32 -0800225
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700226 devs.push(create_gpu_device(
227 cfg,
228 _exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800229 gpu_device_tube,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700230 resource_bridges,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900231 // Use the unnamed socket for GPU display screens.
232 cfg.wayland_socket_paths.get(""),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700233 cfg.x_display.clone(),
Chia-I Wu16fb6592021-11-10 11:45:32 -0800234 render_server_fd,
Zach Reizner65b98f12019-11-22 17:34:58 -0800235 event_devices,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -0800236 map_request,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700237 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -0700238 }
239 }
240
Richard Fung08289b12022-02-02 20:46:19 +0000241 for (_, param) in cfg
242 .serial_parameters
243 .iter()
244 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
245 {
246 let dev = create_console_device(cfg, param)?;
247 devs.push(dev);
248 }
249
250 for disk in &cfg.disks {
251 let disk_device_tube = disk_device_tubes.remove(0);
252 devs.push(create_block_device(cfg, disk, disk_device_tube)?);
253 }
254
255 for blk in &cfg.vhost_user_blk {
256 devs.push(create_vhost_user_block_device(cfg, blk)?);
257 }
258
259 for console in &cfg.vhost_user_console {
260 devs.push(create_vhost_user_console_device(cfg, console)?);
261 }
262
263 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
264 let pmem_device_tube = pmem_device_tubes.remove(0);
265 devs.push(create_pmem_device(
266 cfg,
267 vm,
268 resources,
269 pmem_disk,
270 index,
271 pmem_device_tube,
272 )?);
273 }
274
Andrew Walbrana24a7522022-02-09 18:23:00 +0000275 if cfg.rng {
276 devs.push(create_rng_device(cfg)?);
277 }
Richard Fung08289b12022-02-02 20:46:19 +0000278
279 #[cfg(feature = "tpm")]
280 {
281 if cfg.software_tpm {
282 devs.push(create_tpm_device(cfg)?);
283 }
284 }
285
286 for (idx, single_touch_spec) in cfg.virtio_single_touch.iter().enumerate() {
287 devs.push(create_single_touch_device(
288 cfg,
289 single_touch_spec,
290 idx as u32,
291 )?);
292 }
293
294 for (idx, multi_touch_spec) in cfg.virtio_multi_touch.iter().enumerate() {
295 devs.push(create_multi_touch_device(
296 cfg,
297 multi_touch_spec,
298 idx as u32,
299 )?);
300 }
301
302 for (idx, trackpad_spec) in cfg.virtio_trackpad.iter().enumerate() {
303 devs.push(create_trackpad_device(cfg, trackpad_spec, idx as u32)?);
304 }
305
306 for (idx, mouse_socket) in cfg.virtio_mice.iter().enumerate() {
307 devs.push(create_mouse_device(cfg, mouse_socket, idx as u32)?);
308 }
309
310 for (idx, keyboard_socket) in cfg.virtio_keyboard.iter().enumerate() {
311 devs.push(create_keyboard_device(cfg, keyboard_socket, idx as u32)?);
312 }
313
314 for (idx, switches_socket) in cfg.virtio_switches.iter().enumerate() {
315 devs.push(create_switches_device(cfg, switches_socket, idx as u32)?);
316 }
317
318 for dev_path in &cfg.virtio_input_evdevs {
319 devs.push(create_vinput_device(cfg, dev_path)?);
320 }
321
322 if let Some(balloon_device_tube) = balloon_device_tube {
323 devs.push(create_balloon_device(
324 cfg,
325 balloon_device_tube,
326 balloon_inflate_tube,
327 init_balloon_size,
328 )?);
329 }
330
331 // We checked above that if the IP is defined, then the netmask is, too.
332 for tap_fd in &cfg.tap_fd {
333 devs.push(create_tap_net_device_from_fd(cfg, *tap_fd)?);
334 }
335
336 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
337 (cfg.host_ip, cfg.netmask, cfg.mac_address)
338 {
339 if !cfg.vhost_user_net.is_empty() {
340 bail!("vhost-user-net cannot be used with any of --host_ip, --netmask or --mac");
341 }
342 devs.push(create_net_device_from_config(
343 cfg,
344 host_ip,
345 netmask,
346 mac_address,
347 )?);
348 }
349
350 for tap_name in &cfg.tap_name {
351 devs.push(create_tap_net_device_from_name(cfg, tap_name.as_bytes())?);
352 }
353
354 for net in &cfg.vhost_user_net {
355 devs.push(create_vhost_user_net_device(cfg, net)?);
356 }
357
358 for vsock in &cfg.vhost_user_vsock {
359 devs.push(create_vhost_user_vsock_device(cfg, vsock)?);
360 }
361
362 for opt in &cfg.vhost_user_wl {
363 devs.push(create_vhost_user_wl_device(cfg, opt)?);
364 }
365
Chih-Yang Hsiae31731c2022-01-05 17:30:28 +0800366 #[cfg(feature = "audio_cras")]
367 {
368 for cras_snd in &cfg.cras_snds {
369 devs.push(create_cras_snd_device(cfg, cras_snd.clone())?);
370 }
371 }
372
Daniel Verkampffb59122021-03-18 14:06:15 -0700373 #[cfg(feature = "video-decoder")]
374 {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900375 if let Some((video_dec_tube, video_dec_backend)) = video_dec_cfg {
Daniel Verkampffb59122021-03-18 14:06:15 -0700376 register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900377 video_dec_backend,
Daniel Verkampffb59122021-03-18 14:06:15 -0700378 &mut devs,
379 video_dec_tube,
380 cfg,
381 devices::virtio::VideoDeviceType::Decoder,
382 )?;
383 }
384 }
385
386 #[cfg(feature = "video-encoder")]
387 {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900388 if let Some((video_enc_tube, video_enc_backend)) = video_enc_cfg {
Daniel Verkampffb59122021-03-18 14:06:15 -0700389 register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900390 video_enc_backend,
Daniel Verkampffb59122021-03-18 14:06:15 -0700391 &mut devs,
392 video_enc_tube,
393 cfg,
394 devices::virtio::VideoDeviceType::Encoder,
395 )?;
396 }
397 }
398
Zach Reizneraa575662018-08-15 10:46:32 -0700399 if let Some(cid) = cfg.cid {
Chirantan Ekbote3e8d52b2021-09-10 18:27:16 +0900400 devs.push(create_vhost_vsock_device(cfg, cid)?);
Zach Reizneraa575662018-08-15 10:46:32 -0700401 }
402
Woody Chow5890b702021-02-12 14:57:02 +0900403 for vhost_user_fs in &cfg.vhost_user_fs {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700404 devs.push(create_vhost_user_fs_device(cfg, vhost_user_fs)?);
Woody Chow5890b702021-02-12 14:57:02 +0900405 }
406
Woody Chow1b16db12021-04-02 16:59:59 +0900407 #[cfg(feature = "audio")]
408 for vhost_user_snd in &cfg.vhost_user_snd {
409 devs.push(create_vhost_user_snd_device(cfg, vhost_user_snd)?);
410 }
411
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900412 for shared_dir in &cfg.shared_dirs {
413 let SharedDir {
414 src,
415 tag,
416 kind,
417 uid_map,
418 gid_map,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +0900419 fs_cfg,
420 p9_cfg,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900421 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -0800422
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900423 let dev = match kind {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +0900424 SharedDirKind::FS => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800425 let device_tube = fs_device_tubes.remove(0);
426 create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone(), device_tube)?
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +0900427 }
Chirantan Ekbote75ba8752020-10-27 18:33:02 +0900428 SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag, p9_cfg.clone())?,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900429 };
430 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -0800431 }
432
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900433 if let Some(vhost_user_mac80211_hwsim) = &cfg.vhost_user_mac80211_hwsim {
434 devs.push(create_vhost_user_mac80211_hwsim_device(
435 cfg,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700436 vhost_user_mac80211_hwsim,
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900437 )?);
438 }
439
Jorge E. Moreirad4562d02021-06-28 16:21:12 -0700440 #[cfg(feature = "audio")]
441 if let Some(path) = &cfg.sound {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700442 devs.push(create_sound_device(path, cfg)?);
Jorge E. Moreirad4562d02021-06-28 16:21:12 -0700443 }
444
David Tolnay2b089fc2019-03-04 15:33:22 -0800445 Ok(devs)
446}
447
448fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -0600449 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -0700450 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -0700451 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -0700452 exit_evt: &Event,
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +0000453 panic_wrtube: Tube,
Haiwei Li09b7b8e2022-02-18 18:16:05 +0800454 iommu_attached_endpoints: &mut BTreeMap<u32, Arc<Mutex<Box<dyn MemoryMapperTrait>>>>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800455 control_tubes: &mut Vec<TaggedControlTube>,
456 wayland_device_tube: Tube,
457 gpu_device_tube: Tube,
Alexandre Courbote55b7912022-03-04 16:54:38 +0900458 // Tuple content: (host-side GPU tube, device-side GPU tube, device-side control tube).
459 vhost_user_gpu_tubes: Vec<(Tube, Tube, Tube)>,
Andrew Walbran3cd93602022-01-25 13:59:23 +0000460 balloon_device_tube: Option<Tube>,
David Stevens06d157a2022-01-13 23:44:48 +0900461 init_balloon_size: u64,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800462 disk_device_tubes: &mut Vec<Tube>,
463 pmem_device_tubes: &mut Vec<Tube>,
464 fs_device_tubes: &mut Vec<Tube>,
Daniel Verkampf1439d42021-05-21 13:55:10 -0700465 #[cfg(feature = "usb")] usb_provider: HostBackendDeviceProvider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -0800466 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -0800467 #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>,
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -0800468 vvu_proxy_device_tubes: &mut Vec<Tube>,
Tomasz Nowickiab86d522021-09-22 05:50:46 +0000469) -> DeviceResult<Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>> {
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800470 let mut devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)> = Vec::new();
471 let mut balloon_inflate_tube: Option<Tube> = None;
Zide Chen5deee482021-04-19 11:06:01 -0700472 if !cfg.vfio.is_empty() {
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800473 let mut coiommu_attached_endpoints = Vec::new();
Zide Chendfc4b882021-03-10 16:35:37 -0800474
Tomasz Nowicki71aca792021-06-09 18:53:49 +0000475 for vfio_dev in cfg
476 .vfio
477 .iter()
478 .filter(|dev| dev.get_type() == VfioType::Pci)
479 {
480 let vfio_path = &vfio_dev.vfio_path;
Zide Chen5deee482021-04-19 11:06:01 -0700481 let (vfio_pci_device, jail) = create_vfio_device(
482 cfg,
483 vm,
484 resources,
485 control_tubes,
486 vfio_path.as_path(),
Xiong Zhangf82f2dc2021-05-21 16:54:12 +0800487 None,
Haiwei Li09b7b8e2022-02-18 18:16:05 +0800488 iommu_attached_endpoints,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800489 Some(&mut coiommu_attached_endpoints),
490 vfio_dev.iommu_dev_type(),
Zide Chen5deee482021-04-19 11:06:01 -0700491 )?;
Zide Chendfc4b882021-03-10 16:35:37 -0800492
Tomasz Nowickiab86d522021-09-22 05:50:46 +0000493 devices.push((vfio_pci_device, jail));
Zide Chen5deee482021-04-19 11:06:01 -0700494 }
Zide Chendfc4b882021-03-10 16:35:37 -0800495
Tomasz Nowicki344eb142021-09-22 05:51:58 +0000496 for vfio_dev in cfg
497 .vfio
498 .iter()
499 .filter(|dev| dev.get_type() == VfioType::Platform)
500 {
501 let vfio_path = &vfio_dev.vfio_path;
502 let (vfio_plat_dev, jail) = create_vfio_platform_device(
503 cfg,
504 vm,
505 resources,
506 control_tubes,
507 vfio_path.as_path(),
Haiwei Li09b7b8e2022-02-18 18:16:05 +0800508 iommu_attached_endpoints,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800509 IommuDevType::NoIommu, // Virtio IOMMU is not supported yet
Tomasz Nowicki344eb142021-09-22 05:51:58 +0000510 )?;
511
512 devices.push((Box::new(vfio_plat_dev), jail));
513 }
514
Chuanxiao Dongcb03ec62022-01-20 08:25:38 +0800515 if !coiommu_attached_endpoints.is_empty() || !iommu_attached_endpoints.is_empty() {
516 let mut buf = mem::MaybeUninit::<libc::rlimit>::zeroed();
517 let res = unsafe { libc::getrlimit(libc::RLIMIT_MEMLOCK, buf.as_mut_ptr()) };
518 if res == 0 {
519 let limit = unsafe { buf.assume_init() };
520 let rlim_new = limit
521 .rlim_cur
522 .saturating_add(vm.get_memory().memory_size() as libc::rlim_t);
523 let rlim_max = max(limit.rlim_max, rlim_new);
524 if limit.rlim_cur < rlim_new {
525 let limit_arg = libc::rlimit {
526 rlim_cur: rlim_new as libc::rlim_t,
527 rlim_max: rlim_max as libc::rlim_t,
528 };
529 let res = unsafe { libc::setrlimit(libc::RLIMIT_MEMLOCK, &limit_arg) };
530 if res != 0 {
531 bail!("Set rlimit failed");
532 }
533 }
534 } else {
535 bail!("Get rlimit failed");
536 }
537 }
538
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800539 if !coiommu_attached_endpoints.is_empty() {
540 let vfio_container =
541 VfioCommonSetup::vfio_get_container(IommuDevType::CoIommu, None as Option<&Path>)
542 .context("failed to get vfio container")?;
543 let (coiommu_host_tube, coiommu_device_tube) =
544 Tube::pair().context("failed to create coiommu tube")?;
545 control_tubes.push(TaggedControlTube::VmMemory(coiommu_host_tube));
546 let vcpu_count = cfg.vcpu_count.unwrap_or(1) as u64;
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800547 let (coiommu_tube, balloon_tube) =
548 Tube::pair().context("failed to create coiommu tube")?;
549 balloon_inflate_tube = Some(balloon_tube);
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800550 let dev = CoIommuDev::new(
551 vm.get_memory().clone(),
552 vfio_container,
553 coiommu_device_tube,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800554 coiommu_tube,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800555 coiommu_attached_endpoints,
556 vcpu_count,
Chuanxiao Dongd4468612022-01-14 14:21:17 +0800557 cfg.coiommu_param.unwrap_or_default(),
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800558 )
559 .context("failed to create coiommu device")?;
560
561 devices.push((Box::new(dev), simple_jail(cfg, "coiommu")?));
562 }
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800563 }
564
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800565 let stubs = create_virtio_devices(
566 cfg,
567 vm,
568 resources,
569 exit_evt,
570 wayland_device_tube,
571 gpu_device_tube,
572 vhost_user_gpu_tubes,
573 balloon_device_tube,
574 balloon_inflate_tube,
David Stevens06d157a2022-01-13 23:44:48 +0900575 init_balloon_size,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800576 disk_device_tubes,
577 pmem_device_tubes,
578 map_request,
579 fs_device_tubes,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -0800580 #[cfg(feature = "gpu")]
581 render_server_fd,
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -0800582 vvu_proxy_device_tubes,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800583 )?;
584
585 for stub in stubs {
586 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
587 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
588 let dev = VirtioPciDevice::new(vm.get_memory().clone(), stub.dev, msi_device_tube)
589 .context("failed to create virtio pci dev")?;
590 let dev = Box::new(dev) as Box<dyn BusDeviceObj>;
591 devices.push((dev, stub.jail));
592 }
593
594 #[cfg(feature = "audio")]
595 for ac97_param in &cfg.ac97_parameters {
596 let dev = Ac97Dev::try_new(vm.get_memory().clone(), ac97_param.clone())
597 .context("failed to create ac97 device")?;
598 let jail = simple_jail(cfg, dev.minijail_policy())?;
599 devices.push((Box::new(dev), jail));
600 }
601
602 #[cfg(feature = "usb")]
Sebastian Ene0440d352022-02-04 12:23:56 +0000603 if cfg.usb {
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800604 // Create xhci controller.
605 let usb_controller = Box::new(XhciController::new(vm.get_memory().clone(), usb_provider));
606 devices.push((usb_controller, simple_jail(cfg, "xhci")?));
607 }
608
Mattias Nisslerde2c6402021-10-21 12:05:29 +0000609 for params in &cfg.stub_pci_devices {
610 // Stub devices don't need jailing since they don't do anything.
611 devices.push((Box::new(StubPciDevice::new(params)), None));
612 }
613
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +0000614 devices.push((Box::new(PvPanicPciDevice::new(panic_wrtube)), None));
Tomasz Nowickiab86d522021-09-22 05:50:46 +0000615 Ok(devices)
David Tolnay2b089fc2019-03-04 15:33:22 -0800616}
617
Mattias Nisslerbbd91d02021-12-07 08:57:45 +0000618fn create_file_backed_mappings(
619 cfg: &Config,
620 vm: &mut impl Vm,
621 resources: &mut SystemAllocator,
622) -> Result<()> {
623 for mapping in &cfg.file_backed_mappings {
624 let file = OpenOptions::new()
625 .read(true)
626 .write(mapping.writable)
627 .custom_flags(if mapping.sync { libc::O_SYNC } else { 0 })
628 .open(&mapping.path)
629 .context("failed to open file for file-backed mapping")?;
630 let prot = if mapping.writable {
631 Protection::read_write()
632 } else {
633 Protection::read()
634 };
635 let size = mapping
636 .size
637 .try_into()
638 .context("Invalid size for file-backed mapping")?;
639 let memory_mapping = MemoryMappingBuilder::new(size)
640 .from_file(&file)
641 .offset(mapping.offset)
642 .protection(prot)
643 .build()
644 .context("failed to map backing file for file-backed mapping")?;
645
Daniel Verkampde4d7292022-03-01 15:22:38 -0800646 match resources.mmio_allocator_any().allocate_at(
647 mapping.address,
648 mapping.size,
649 Alloc::FileBacked(mapping.address),
650 "file-backed mapping".to_owned(),
651 ) {
652 // OutOfSpace just means that this mapping is not in the MMIO regions at all, so don't
653 // consider it an error.
654 // TODO(b/222769529): Reserve this region in a global memory address space allocator once
655 // we have that so nothing else can accidentally overlap with it.
656 Ok(()) | Err(resources::Error::OutOfSpace) => {}
657 e => e.context("failed to allocate guest address for file-backed mapping")?,
658 }
Mattias Nisslerbbd91d02021-12-07 08:57:45 +0000659
660 vm.add_memory_region(
661 GuestAddress(mapping.address),
662 Box::new(memory_mapping),
663 !mapping.writable,
664 /* log_dirty_pages = */ false,
665 )
666 .context("failed to configure file-backed mapping")?;
667 }
668
669 Ok(())
670}
671
Anton Romanov33334412022-03-22 17:48:18 +0000672#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Xiong Zhangf7874712021-12-24 10:53:59 +0800673fn create_pcie_root_port(
674 host_pcie_rp: Vec<PathBuf>,
675 sys_allocator: &mut SystemAllocator,
676 control_tubes: &mut Vec<TaggedControlTube>,
677 devices: &mut Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
678 hp_vec: &mut Vec<Arc<Mutex<dyn HotPlugBus>>>,
Haiwei Li09b7b8e2022-02-18 18:16:05 +0800679 hp_endpoints_ranges: &mut Vec<RangeInclusive<u32>>,
Xiong Zhangf7874712021-12-24 10:53:59 +0800680) -> Result<()> {
681 if host_pcie_rp.is_empty() {
682 // user doesn't specify host pcie root port which link to this virtual pcie rp,
683 // find the empty bus and create a total virtual pcie rp
Haiwei Lie35d4652022-02-10 15:39:33 +0800684 let mut hp_sec_bus = 0u8;
685 // Create Pcie Root Port for non-root buses, each non-root bus device will be
686 // connected behind a virtual pcie root port.
687 for i in 1..255 {
688 if sys_allocator.pci_bus_empty(i) {
689 if hp_sec_bus == 0 {
690 hp_sec_bus = i;
691 }
692 continue;
693 }
694 let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new(i, false)));
695 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
696 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
697 let pci_bridge = Box::new(PciBridge::new(pcie_root_port.clone(), msi_device_tube));
698 // no ipc is used if the root port disables hotplug
699 devices.push((pci_bridge, None));
700 }
701
702 // Create Pcie Root Port for hot-plug
703 if hp_sec_bus == 0 {
704 return Err(anyhow!("no more addresses are available"));
705 }
706 let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new(hp_sec_bus, true)));
Xiong Zhangf7874712021-12-24 10:53:59 +0800707 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
708 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
709 let pci_bridge = Box::new(PciBridge::new(pcie_root_port.clone(), msi_device_tube));
710
Haiwei Li09b7b8e2022-02-18 18:16:05 +0800711 hp_endpoints_ranges.push(RangeInclusive::new(
712 PciAddress {
713 bus: pci_bridge.get_secondary_num(),
714 dev: 0,
715 func: 0,
716 }
717 .to_u32(),
718 PciAddress {
719 bus: pci_bridge.get_subordinate_num(),
720 dev: 32,
721 func: 8,
722 }
723 .to_u32(),
724 ));
725
Xiong Zhangf7874712021-12-24 10:53:59 +0800726 devices.push((pci_bridge, None));
727 hp_vec.push(pcie_root_port as Arc<Mutex<dyn HotPlugBus>>);
728 } else {
729 // user specify host pcie root port which link to this virtual pcie rp,
730 // reserve the host pci BDF and create a virtual pcie RP with some attrs same as host
731 for pcie_sysfs in host_pcie_rp.iter() {
Xiong Zhangd6de3192022-02-16 13:24:06 +0800732 let pcie_host = PcieHostRootPort::new(pcie_sysfs.as_path())?;
733 let bus_range = pcie_host.get_bus_range();
734 let mut slot_implemented = true;
735 for i in bus_range.secondary..=bus_range.subordinate {
736 // if this bus is occupied by one vfio-pci device, this vfio-pci device is
737 // connected to a pci bridge on host statically, then it should be connected
738 // to a virtual pci bridge in guest statically, this bridge won't have
739 // hotplug capability and won't use slot.
740 if !sys_allocator.pci_bus_empty(i) {
741 slot_implemented = false;
742 }
743 }
744 let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new_from_host(
745 pcie_host,
746 slot_implemented,
747 )?));
Xiong Zhangf7874712021-12-24 10:53:59 +0800748
749 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
750 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
751 let mut pci_bridge = Box::new(PciBridge::new(pcie_root_port.clone(), msi_device_tube));
752 // early reservation for host pcie root port devices.
753 let rootport_addr = pci_bridge.allocate_address(sys_allocator);
754 if rootport_addr.is_err() {
755 warn!(
756 "address reservation failed for hot pcie root port {}",
757 pci_bridge.debug_label()
758 );
759 }
760
Haiwei Li09b7b8e2022-02-18 18:16:05 +0800761 hp_endpoints_ranges.push(RangeInclusive::new(
762 PciAddress {
763 bus: pci_bridge.get_secondary_num(),
764 dev: 0,
765 func: 0,
766 }
767 .to_u32(),
768 PciAddress {
769 bus: pci_bridge.get_subordinate_num(),
770 dev: 32,
771 func: 8,
772 }
773 .to_u32(),
774 ));
775
Xiong Zhangf7874712021-12-24 10:53:59 +0800776 devices.push((pci_bridge, None));
777 hp_vec.push(pcie_root_port as Arc<Mutex<dyn HotPlugBus>>);
778 }
779 }
780
781 Ok(())
782}
783
Zach Reiznera90649a2021-03-31 12:56:08 -0700784fn setup_vm_components(cfg: &Config) -> Result<VmComponents> {
David Tolnay2b089fc2019-03-04 15:33:22 -0800785 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +0100786 Some(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +0900787 open_file(
788 initrd_path,
789 true, /*read_only*/
790 false, /*O_DIRECT*/
791 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700792 .with_context(|| format!("failed to open initrd {}", initrd_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +0100793 )
Daniel Verkampe403f5c2018-12-11 16:29:26 -0800794 } else {
795 None
796 };
797
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700798 let vm_image = match cfg.executable_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +0100799 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +0900800 open_file(
801 kernel_path,
802 true, /*read_only*/
803 false, /*O_DIRECT*/
804 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700805 .with_context(|| format!("failed to open kernel image {}", kernel_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +0100806 ),
807 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +0900808 open_file(bios_path, true /*read_only*/, false /*O_DIRECT*/)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700809 .with_context(|| format!("failed to open bios {}", bios_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +0100810 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700811 _ => panic!("Did not receive a bios or kernel, should be impossible."),
812 };
813
Will Deaconc48e7832021-07-30 19:03:06 +0100814 let swiotlb = if let Some(size) = cfg.swiotlb {
815 Some(
816 size.checked_mul(1024 * 1024)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700817 .ok_or_else(|| anyhow!("requested swiotlb size too large"))?,
Will Deaconc48e7832021-07-30 19:03:06 +0100818 )
819 } else {
820 match cfg.protected_vm {
Andrew Walbran0bbbb682021-12-13 13:42:07 +0000821 ProtectionType::Protected | ProtectionType::ProtectedWithoutFirmware => {
822 Some(64 * 1024 * 1024)
823 }
Will Deaconc48e7832021-07-30 19:03:06 +0100824 ProtectionType::Unprotected => None,
825 }
826 };
827
Zach Reiznera90649a2021-03-31 12:56:08 -0700828 Ok(VmComponents {
Daniel Verkamp6a847062019-11-26 13:16:35 -0800829 memory_size: cfg
830 .memory
831 .unwrap_or(256)
832 .checked_mul(1024 * 1024)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700833 .ok_or_else(|| anyhow!("requested memory size too large"))?,
Will Deaconc48e7832021-07-30 19:03:06 +0100834 swiotlb,
Dylan Reid059a1882018-07-23 17:58:09 -0700835 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -0700836 vcpu_affinity: cfg.vcpu_affinity.clone(),
Daniel Verkamp8a72afc2021-03-15 17:55:52 -0700837 cpu_clusters: cfg.cpu_clusters.clone(),
838 cpu_capacity: cfg.cpu_capacity.clone(),
Dmytro Maluka74031b42022-02-25 18:00:17 +0000839 #[cfg(feature = "direct")]
Dmytro Maluka6cea2c72022-02-25 18:22:17 +0000840 direct_gpe: cfg.direct_gpe.clone(),
Suleiman Souhlal015c3c12020-10-07 14:15:41 +0900841 no_smt: cfg.no_smt,
Sergey Senozhatsky1e369c52021-04-13 20:23:51 +0900842 hugepages: cfg.hugepages,
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700843 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -0800844 android_fstab: cfg
845 .android_fstab
846 .as_ref()
Daniel Verkamp6b298582021-08-16 15:37:11 -0700847 .map(|x| {
848 File::open(x)
849 .with_context(|| format!("failed to open android fstab file {}", x.display()))
850 })
Tristan Muntsinger4133b012018-12-21 16:01:56 -0800851 .map_or(Ok(None), |v| v.map(Some))?,
Kansho Nishida282115b2019-12-18 13:13:14 +0900852 pstore: cfg.pstore.clone(),
Daniel Verkampe403f5c2018-12-11 16:29:26 -0800853 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -0700854 extra_kernel_params: cfg.params.clone(),
Tomasz Jeznach42644642020-05-20 23:27:59 -0700855 acpi_sdts: cfg
856 .acpi_tables
857 .iter()
Daniel Verkamp6b298582021-08-16 15:37:11 -0700858 .map(|path| {
859 SDT::from_file(path)
860 .with_context(|| format!("failed to open ACPI file {}", path.display()))
861 })
Tomasz Jeznach42644642020-05-20 23:27:59 -0700862 .collect::<Result<Vec<SDT>>>()?,
Kansho Nishidaab205af2020-08-13 18:17:50 +0900863 rt_cpus: cfg.rt_cpus.clone(),
Suleiman Souhlal63630e82021-02-18 11:53:11 +0900864 delay_rt: cfg.delay_rt,
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100865 protected_vm: cfg.protected_vm,
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900866 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznera90649a2021-03-31 12:56:08 -0700867 gdb: None,
Tomasz Jeznachccb26942021-03-30 22:44:11 -0700868 dmi_path: cfg.dmi_path.clone(),
Tomasz Jeznachd93c29f2021-04-12 11:00:24 -0700869 no_legacy: cfg.no_legacy,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +0800870 host_cpu_topology: cfg.host_cpu_topology,
Grzegorz Jaszczykd33874e2022-02-11 18:27:29 +0000871 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
872 force_s2idle: cfg.force_s2idle,
Zach Reiznera90649a2021-03-31 12:56:08 -0700873 })
874}
875
Andrew Walbranb28ae8e2022-01-17 14:33:10 +0000876#[derive(Copy, Clone, Debug, Eq, PartialEq)]
Dmitry Torokhovf75699f2021-12-03 11:19:13 -0800877pub enum ExitState {
878 Reset,
879 Stop,
Andrew Walbran1a19c672022-01-24 17:24:10 +0000880 Crash,
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +0000881 GuestPanic,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -0800882}
883
Daniel Verkamp5586ff52022-02-24 16:34:55 -0800884// Remove ranges in `guest_mem_layout` that overlap with ranges in `file_backed_mappings`.
885// Returns the updated guest memory layout.
886fn punch_holes_in_guest_mem_layout_for_mappings(
887 guest_mem_layout: Vec<(GuestAddress, u64)>,
888 file_backed_mappings: &[FileBackedMappingParameters],
889) -> Vec<(GuestAddress, u64)> {
890 // Create a set containing (start, end) pairs with exclusive end (end = start + size; the byte
891 // at end is not included in the range).
892 let mut layout_set = BTreeSet::new();
893 for (addr, size) in &guest_mem_layout {
894 layout_set.insert((addr.offset(), addr.offset() + size));
895 }
896
897 for mapping in file_backed_mappings {
898 let mapping_start = mapping.address;
899 let mapping_end = mapping_start + mapping.size;
900
901 // Repeatedly split overlapping guest memory regions until no overlaps remain.
902 while let Some((range_start, range_end)) = layout_set
903 .iter()
904 .find(|&&(range_start, range_end)| {
905 mapping_start < range_end && mapping_end > range_start
906 })
907 .cloned()
908 {
909 layout_set.remove(&(range_start, range_end));
910
911 if range_start < mapping_start {
912 layout_set.insert((range_start, mapping_start));
913 }
914 if range_end > mapping_end {
915 layout_set.insert((mapping_end, range_end));
916 }
917 }
918 }
919
920 // Build the final guest memory layout from the modified layout_set.
921 layout_set
922 .iter()
923 .map(|(start, end)| (GuestAddress(*start), end - start))
924 .collect()
925}
926
Dmitry Torokhovf75699f2021-12-03 11:19:13 -0800927pub fn run_config(cfg: Config) -> Result<ExitState> {
Zach Reiznerdc748482021-04-14 13:59:30 -0700928 let components = setup_vm_components(&cfg)?;
929
930 let guest_mem_layout =
Daniel Verkamp6b298582021-08-16 15:37:11 -0700931 Arch::guest_memory_layout(&components).context("failed to create guest memory layout")?;
Daniel Verkamp5586ff52022-02-24 16:34:55 -0800932
933 let guest_mem_layout =
934 punch_holes_in_guest_mem_layout_for_mappings(guest_mem_layout, &cfg.file_backed_mappings);
935
Daniel Verkamp6b298582021-08-16 15:37:11 -0700936 let guest_mem = GuestMemory::new(&guest_mem_layout).context("failed to create guest memory")?;
Zach Reiznerdc748482021-04-14 13:59:30 -0700937 let mut mem_policy = MemoryPolicy::empty();
938 if components.hugepages {
939 mem_policy |= MemoryPolicy::USE_HUGEPAGES;
940 }
Quentin Perret26203802021-12-02 09:48:43 +0000941 guest_mem.set_memory_policy(mem_policy);
Daniel Verkamp6b298582021-08-16 15:37:11 -0700942 let kvm = Kvm::new_with_path(&cfg.kvm_device_path).context("failed to create kvm")?;
Andrew Walbran00f1c9f2021-12-10 17:13:08 +0000943 let vm = KvmVm::new(&kvm, guest_mem, components.protected_vm).context("failed to create vm")?;
Andrew Walbrane79aba12022-01-27 14:12:35 +0000944 // Check that the VM was actually created in protected mode as expected.
945 if cfg.protected_vm != ProtectionType::Unprotected && !vm.check_capability(VmCap::Protected) {
946 bail!("Failed to create protected VM");
947 }
Daniel Verkamp6b298582021-08-16 15:37:11 -0700948 let vm_clone = vm.try_clone().context("failed to clone vm")?;
Zach Reiznerdc748482021-04-14 13:59:30 -0700949
950 enum KvmIrqChip {
951 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
952 Split(KvmSplitIrqChip),
953 Kernel(KvmKernelIrqChip),
954 }
955
956 impl KvmIrqChip {
957 fn as_mut(&mut self) -> &mut dyn IrqChipArch {
958 match self {
959 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
960 KvmIrqChip::Split(i) => i,
961 KvmIrqChip::Kernel(i) => i,
962 }
963 }
964 }
965
966 let ioapic_host_tube;
967 let mut irq_chip = if cfg.split_irqchip {
968 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
969 unimplemented!("KVM split irqchip mode only supported on x86 processors");
970 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
971 {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700972 let (host_tube, ioapic_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerdc748482021-04-14 13:59:30 -0700973 ioapic_host_tube = Some(host_tube);
974 KvmIrqChip::Split(
975 KvmSplitIrqChip::new(
976 vm_clone,
977 components.vcpu_count,
978 ioapic_device_tube,
979 Some(120),
980 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700981 .context("failed to create IRQ chip")?,
Zach Reiznerdc748482021-04-14 13:59:30 -0700982 )
983 }
984 } else {
985 ioapic_host_tube = None;
986 KvmIrqChip::Kernel(
Daniel Verkamp6b298582021-08-16 15:37:11 -0700987 KvmKernelIrqChip::new(vm_clone, components.vcpu_count)
988 .context("failed to create IRQ chip")?,
Zach Reiznerdc748482021-04-14 13:59:30 -0700989 )
990 };
991
992 run_vm::<KvmVcpu, KvmVm>(cfg, components, vm, irq_chip.as_mut(), ioapic_host_tube)
993}
994
995fn run_vm<Vcpu, V>(
Zach Reiznera90649a2021-03-31 12:56:08 -0700996 cfg: Config,
997 #[allow(unused_mut)] mut components: VmComponents,
Zach Reiznerdc748482021-04-14 13:59:30 -0700998 mut vm: V,
999 irq_chip: &mut dyn IrqChipArch,
1000 ioapic_host_tube: Option<Tube>,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001001) -> Result<ExitState>
Zach Reiznera90649a2021-03-31 12:56:08 -07001002where
1003 Vcpu: VcpuArch + 'static,
1004 V: VmArch + 'static,
Zach Reiznera90649a2021-03-31 12:56:08 -07001005{
1006 if cfg.sandbox {
1007 // Printing something to the syslog before entering minijail so that libc's syslogger has a
1008 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
1009 // access to those files will not be possible.
1010 info!("crosvm entering multiprocess mode");
1011 }
1012
Daniel Verkampf1439d42021-05-21 13:55:10 -07001013 #[cfg(feature = "usb")]
Zach Reiznera90649a2021-03-31 12:56:08 -07001014 let (usb_control_tube, usb_provider) =
Daniel Verkamp6b298582021-08-16 15:37:11 -07001015 HostBackendDeviceProvider::new().context("failed to create usb provider")?;
Daniel Verkampf1439d42021-05-21 13:55:10 -07001016
Zach Reiznera90649a2021-03-31 12:56:08 -07001017 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
1018 // before any jailed devices have been spawned, so that we can catch any of them that fail very
1019 // quickly.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001020 let sigchld_fd = SignalFd::new(libc::SIGCHLD).context("failed to create signalfd")?;
Dylan Reid059a1882018-07-23 17:58:09 -07001021
Zach Reiznera60744b2019-02-13 17:33:32 -08001022 let control_server_socket = match &cfg.socket_path {
1023 Some(path) => Some(UnlinkUnixSeqpacketListener(
Daniel Verkamp6b298582021-08-16 15:37:11 -07001024 UnixSeqpacketListener::bind(path).context("failed to create control server")?,
Zach Reiznera60744b2019-02-13 17:33:32 -08001025 )),
1026 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07001027 };
Zach Reiznera60744b2019-02-13 17:33:32 -08001028
Zach Reiznera90649a2021-03-31 12:56:08 -07001029 let mut control_tubes = Vec::new();
1030
1031 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1032 if let Some(port) = cfg.gdb {
1033 // GDB needs a control socket to interrupt vcpus.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001034 let (gdb_host_tube, gdb_control_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznera90649a2021-03-31 12:56:08 -07001035 control_tubes.push(TaggedControlTube::Vm(gdb_host_tube));
1036 components.gdb = Some((port, gdb_control_tube));
1037 }
1038
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09001039 for wl_cfg in &cfg.vhost_user_wl {
1040 let wayland_host_tube = UnixSeqpacket::connect(&wl_cfg.vm_tube)
1041 .map(Tube::new)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001042 .context("failed to connect to wayland tube")?;
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09001043 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
1044 }
1045
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001046 let mut vhost_user_gpu_tubes = Vec::with_capacity(cfg.vhost_user_gpu.len());
1047 for _ in 0..cfg.vhost_user_gpu.len() {
Alexandre Courbote55b7912022-03-04 16:54:38 +09001048 let (host_control_tube, device_control_tube) =
1049 Tube::pair().context("failed to create tube")?;
1050 let (host_gpu_tube, device_gpu_tube) = Tube::pair().context("failed to create tube")?;
1051 vhost_user_gpu_tubes.push((host_gpu_tube, device_gpu_tube, device_control_tube));
1052 control_tubes.push(TaggedControlTube::VmMemory(host_control_tube));
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001053 }
1054
Daniel Verkamp6b298582021-08-16 15:37:11 -07001055 let (wayland_host_tube, wayland_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001056 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
Andrew Walbran3cd93602022-01-25 13:59:23 +00001057
1058 let (balloon_host_tube, balloon_device_tube) = if cfg.balloon {
David Stevens8be9ef02022-01-13 22:50:24 +09001059 if let Some(ref path) = cfg.balloon_control {
1060 (
1061 None,
1062 Some(Tube::new(
1063 UnixSeqpacket::connect(path).context("failed to create balloon control")?,
1064 )),
1065 )
1066 } else {
1067 // Balloon gets a special socket so balloon requests can be forwarded
1068 // from the main process.
1069 let (host, device) = Tube::pair().context("failed to create tube")?;
1070 // Set recv timeout to avoid deadlock on sending BalloonControlCommand
1071 // before the guest is ready.
1072 host.set_recv_timeout(Some(Duration::from_millis(100)))
1073 .context("failed to set timeout")?;
1074 (Some(host), Some(device))
1075 }
Andrew Walbran3cd93602022-01-25 13:59:23 +00001076 } else {
1077 (None, None)
1078 };
Dylan Reid059a1882018-07-23 17:58:09 -07001079
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001080 // Create one control socket per disk.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001081 let mut disk_device_tubes = Vec::new();
1082 let mut disk_host_tubes = Vec::new();
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001083 let disk_count = cfg.disks.len();
1084 for _ in 0..disk_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001085 let (disk_host_tub, disk_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001086 disk_host_tubes.push(disk_host_tub);
1087 disk_device_tubes.push(disk_device_tube);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001088 }
1089
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001090 let mut pmem_device_tubes = Vec::new();
Daniel Verkampe1980a92020-02-07 11:00:55 -08001091 let pmem_count = cfg.pmem_devices.len();
1092 for _ in 0..pmem_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001093 let (pmem_host_tube, pmem_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001094 pmem_device_tubes.push(pmem_device_tube);
1095 control_tubes.push(TaggedControlTube::VmMsync(pmem_host_tube));
Daniel Verkampe1980a92020-02-07 11:00:55 -08001096 }
1097
Daniel Verkamp6b298582021-08-16 15:37:11 -07001098 let (gpu_host_tube, gpu_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001099 control_tubes.push(TaggedControlTube::VmMemory(gpu_host_tube));
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001100
Zach Reiznerdc748482021-04-14 13:59:30 -07001101 if let Some(ioapic_host_tube) = ioapic_host_tube {
1102 control_tubes.push(TaggedControlTube::VmIrq(ioapic_host_tube));
1103 }
Zhuocheng Dingf2e90bf2019-12-02 15:50:20 +08001104
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08001105 let battery = if cfg.battery_type.is_some() {
Daniel Verkampcfe49462021-08-19 17:11:05 -07001106 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(clippy::manual_map))]
Alex Lauf408c732020-11-10 18:24:04 +09001107 let jail = match simple_jail(&cfg, "battery")? {
Daniel Verkampcfe49462021-08-19 17:11:05 -07001108 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(unused_mut))]
Alex Lauf408c732020-11-10 18:24:04 +09001109 Some(mut jail) => {
1110 // Setup a bind mount to the system D-Bus socket if the powerd monitor is used.
1111 #[cfg(feature = "power-monitor-powerd")]
1112 {
Fergus Dall51200512021-08-19 12:54:26 +10001113 add_current_user_to_jail(&mut jail)?;
Alex Lauf408c732020-11-10 18:24:04 +09001114
1115 // Create a tmpfs in the device's root directory so that we can bind mount files.
1116 jail.mount_with_data(
1117 Path::new("none"),
1118 Path::new("/"),
1119 "tmpfs",
1120 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
1121 "size=67108864",
1122 )?;
1123
1124 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
1125 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
1126 }
1127 Some(jail)
1128 }
1129 None => None,
1130 };
1131 (&cfg.battery_type, jail)
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08001132 } else {
1133 (&cfg.battery_type, None)
1134 };
1135
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001136 let map_request: Arc<Mutex<Option<ExternalMapping>>> = Arc::new(Mutex::new(None));
1137
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001138 let fs_count = cfg
1139 .shared_dirs
1140 .iter()
1141 .filter(|sd| sd.kind == SharedDirKind::FS)
1142 .count();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001143 let mut fs_device_tubes = Vec::with_capacity(fs_count);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001144 for _ in 0..fs_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001145 let (fs_host_tube, fs_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001146 control_tubes.push(TaggedControlTube::Fs(fs_host_tube));
1147 fs_device_tubes.push(fs_device_tube);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001148 }
1149
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -08001150 let mut vvu_proxy_device_tubes = Vec::new();
1151 for _ in 0..cfg.vvu_proxy.len() {
1152 let (vvu_proxy_host_tube, vvu_proxy_device_tube) =
1153 Tube::pair().context("failed to create VVU proxy tube")?;
1154 control_tubes.push(TaggedControlTube::VmMemory(vvu_proxy_host_tube));
1155 vvu_proxy_device_tubes.push(vvu_proxy_device_tube);
1156 }
1157
Daniel Verkamp6b298582021-08-16 15:37:11 -07001158 let exit_evt = Event::new().context("failed to create event")?;
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001159 let reset_evt = Event::new().context("failed to create event")?;
Andrew Walbran1a19c672022-01-24 17:24:10 +00001160 let crash_evt = Event::new().context("failed to create event")?;
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +00001161 let (panic_rdtube, panic_wrtube) = Tube::pair().context("failed to create tube")?;
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09001162
David Stevense4db4172022-03-10 13:26:04 +09001163 let pstore_size = components.pstore.as_ref().map(|pstore| pstore.size as u64);
David Stevensdbd24182022-03-10 10:53:56 +09001164 let mut sys_allocator = SystemAllocator::new(
1165 Arch::get_system_allocator_config(&vm),
1166 pstore_size,
1167 &cfg.mmio_address_ranges,
1168 )
1169 .context("failed to create system allocator")?;
David Stevense4db4172022-03-10 13:26:04 +09001170
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09001171 let ramoops_region = match &components.pstore {
1172 Some(pstore) => Some(
David Stevense4db4172022-03-10 13:26:04 +09001173 arch::pstore::create_memory_region(
1174 &mut vm,
1175 sys_allocator.reserved_region().unwrap(),
1176 pstore,
1177 )
1178 .context("failed to allocate pstore region")?,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09001179 ),
1180 None => None,
1181 };
1182
Mattias Nisslerbbd91d02021-12-07 08:57:45 +00001183 create_file_backed_mappings(&cfg, &mut vm, &mut sys_allocator)?;
1184
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08001185 #[cfg(feature = "gpu")]
1186 // Hold on to the render server jail so it keeps running until we exit run_vm()
Dmitry Torokhove464a7a2022-01-26 13:29:36 -08001187 let (_render_server_jail, render_server_fd) =
1188 if let Some(parameters) = &cfg.gpu_render_server_parameters {
1189 let (jail, fd) = start_gpu_render_server(&cfg, parameters)?;
1190 (Some(ScopedMinijail(jail)), Some(fd))
1191 } else {
1192 (None, None)
1193 };
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08001194
David Stevens06d157a2022-01-13 23:44:48 +09001195 let init_balloon_size = components
1196 .memory_size
1197 .checked_sub(cfg.init_memory.map_or(components.memory_size, |m| {
1198 m.checked_mul(1024 * 1024).unwrap_or(u64::MAX)
1199 }))
1200 .context("failed to calculate init balloon size")?;
1201
Tomasz Nowicki64f43552022-02-22 14:14:45 +00001202 #[cfg(feature = "direct")]
1203 let mut irqs = Vec::new();
1204
1205 #[cfg(feature = "direct")]
1206 for irq in &cfg.direct_level_irq {
1207 if !sys_allocator.reserve_irq(*irq) {
1208 warn!("irq {} already reserved.", irq);
1209 }
1210 let trigger = Event::new().context("failed to create event")?;
1211 let resample = Event::new().context("failed to create event")?;
1212 irq_chip
1213 .register_irq_event(*irq, &trigger, Some(&resample))
1214 .unwrap();
1215 let direct_irq = devices::DirectIrq::new(trigger, Some(resample))
1216 .context("failed to enable interrupt forwarding")?;
1217 direct_irq
1218 .irq_enable(*irq)
1219 .context("failed to enable interrupt forwarding")?;
1220 irqs.push(direct_irq);
1221 }
1222
1223 #[cfg(feature = "direct")]
1224 for irq in &cfg.direct_edge_irq {
1225 if !sys_allocator.reserve_irq(*irq) {
1226 warn!("irq {} already reserved.", irq);
1227 }
1228 let trigger = Event::new().context("failed to create event")?;
1229 irq_chip.register_irq_event(*irq, &trigger, None).unwrap();
1230 let direct_irq = devices::DirectIrq::new(trigger, None)
1231 .context("failed to enable interrupt forwarding")?;
1232 direct_irq
1233 .irq_enable(*irq)
1234 .context("failed to enable interrupt forwarding")?;
1235 irqs.push(direct_irq);
1236 }
1237
Haiwei Li09b7b8e2022-02-18 18:16:05 +08001238 let mut iommu_attached_endpoints: BTreeMap<u32, Arc<Mutex<Box<dyn MemoryMapperTrait>>>> =
1239 BTreeMap::new();
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001240 let mut devices = create_devices(
Zach Reiznerdc748482021-04-14 13:59:30 -07001241 &cfg,
1242 &mut vm,
1243 &mut sys_allocator,
1244 &exit_evt,
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +00001245 panic_wrtube,
Haiwei Li09b7b8e2022-02-18 18:16:05 +08001246 &mut iommu_attached_endpoints,
Zach Reiznerdc748482021-04-14 13:59:30 -07001247 &mut control_tubes,
1248 wayland_device_tube,
1249 gpu_device_tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001250 vhost_user_gpu_tubes,
Zach Reiznerdc748482021-04-14 13:59:30 -07001251 balloon_device_tube,
David Stevens06d157a2022-01-13 23:44:48 +09001252 init_balloon_size,
Zach Reiznerdc748482021-04-14 13:59:30 -07001253 &mut disk_device_tubes,
1254 &mut pmem_device_tubes,
1255 &mut fs_device_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07001256 #[cfg(feature = "usb")]
Zach Reiznerdc748482021-04-14 13:59:30 -07001257 usb_provider,
1258 Arc::clone(&map_request),
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08001259 #[cfg(feature = "gpu")]
1260 render_server_fd,
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -08001261 &mut vvu_proxy_device_tubes,
Zach Reiznerdc748482021-04-14 13:59:30 -07001262 )?;
1263
Haiwei Li09b7b8e2022-02-18 18:16:05 +08001264 let mut hp_endpoints_ranges: Vec<RangeInclusive<u32>> = Vec::new();
1265
Anton Romanov33334412022-03-22 17:48:18 +00001266 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Xiong Zhangf7874712021-12-24 10:53:59 +08001267 let mut hotplug_buses: Vec<Arc<Mutex<dyn HotPlugBus>>> = Vec::new();
Anton Romanov33334412022-03-22 17:48:18 +00001268
Xiong Zhangf7874712021-12-24 10:53:59 +08001269 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1270 {
1271 #[cfg(feature = "direct")]
1272 let rp_host = cfg.pcie_rp.clone();
1273 #[cfg(not(feature = "direct"))]
1274 let rp_host: Vec<PathBuf> = Vec::new();
1275
1276 // Create Pcie Root Port
1277 create_pcie_root_port(
1278 rp_host,
1279 &mut sys_allocator,
1280 &mut control_tubes,
1281 &mut devices,
1282 &mut hotplug_buses,
Haiwei Li09b7b8e2022-02-18 18:16:05 +08001283 &mut hp_endpoints_ranges,
Xiong Zhangf7874712021-12-24 10:53:59 +08001284 )?;
1285 }
1286
Haiwei Li09b7b8e2022-02-18 18:16:05 +08001287 let (translate_response_senders, request_rx) = setup_virtio_access_platform(
1288 &mut sys_allocator,
1289 &mut iommu_attached_endpoints,
1290 &mut devices,
1291 )?;
1292
Haiwei Li87bc2fc2022-02-18 14:37:40 +08001293 let iommu_host_tube = if !iommu_attached_endpoints.is_empty() || cfg.virtio_iommu {
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001294 let (iommu_host_tube, iommu_device_tube) = Tube::pair().context("failed to create tube")?;
Haiwei Li09b7b8e2022-02-18 18:16:05 +08001295 let iommu_dev = create_iommu_device(
1296 &cfg,
1297 (1u64 << vm.get_guest_phys_addr_bits()) - 1,
1298 iommu_attached_endpoints,
1299 hp_endpoints_ranges,
1300 translate_response_senders,
1301 request_rx,
1302 iommu_device_tube,
1303 )?;
1304
1305 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
1306 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
1307 let mut dev = VirtioPciDevice::new(vm.get_memory().clone(), iommu_dev.dev, msi_device_tube)
1308 .context("failed to create virtio pci dev")?;
1309 // early reservation for viommu.
1310 dev.allocate_address(&mut sys_allocator)
1311 .context("failed to allocate resources early for virtio pci dev")?;
1312 let dev = Box::new(dev);
1313 devices.push((dev, iommu_dev.jail));
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001314 Some(iommu_host_tube)
1315 } else {
1316 None
1317 };
Haiwei Li09b7b8e2022-02-18 18:16:05 +08001318
Peter Fangc2bba082021-04-19 18:40:24 -07001319 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001320 for device in devices
1321 .iter_mut()
1322 .filter_map(|(dev, _)| dev.as_pci_device_mut())
1323 {
Peter Fangc2bba082021-04-19 18:40:24 -07001324 let sdts = device
1325 .generate_acpi(components.acpi_sdts)
1326 .or_else(|| {
1327 error!("ACPI table generation error");
1328 None
1329 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07001330 .ok_or_else(|| anyhow!("failed to generate ACPI table"))?;
Peter Fangc2bba082021-04-19 18:40:24 -07001331 components.acpi_sdts = sdts;
1332 }
1333
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001334 // KVM_CREATE_VCPU uses apic id for x86 and uses cpu id for others.
1335 let mut kvm_vcpu_ids = Vec::new();
1336
Kuo-Hsin Yang6139da62021-04-14 16:55:24 +08001337 #[cfg_attr(not(feature = "direct"), allow(unused_mut))]
Zach Reiznerdc748482021-04-14 13:59:30 -07001338 let mut linux = Arch::build_vm::<V, Vcpu>(
Trent Begin17ccaad2019-04-17 13:51:25 -06001339 components,
Zach Reiznerdc748482021-04-14 13:59:30 -07001340 &exit_evt,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001341 &reset_evt,
Zach Reiznerdc748482021-04-14 13:59:30 -07001342 &mut sys_allocator,
Trent Begin17ccaad2019-04-17 13:51:25 -06001343 &cfg.serial_parameters,
Matt Delco45caf912019-11-13 08:11:09 -08001344 simple_jail(&cfg, "serial")?,
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08001345 battery,
Zach Reiznera90649a2021-03-31 12:56:08 -07001346 vm,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09001347 ramoops_region,
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001348 devices,
Zach Reiznerdc748482021-04-14 13:59:30 -07001349 irq_chip,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001350 &mut kvm_vcpu_ids,
Trent Begin17ccaad2019-04-17 13:51:25 -06001351 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001352 .context("the architecture failed to build the vm")?;
Lepton Wu60893882018-11-21 11:06:18 -08001353
Daniel Verkamp1286b482021-11-30 15:14:16 -08001354 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1355 {
Xiong Zhangf7874712021-12-24 10:53:59 +08001356 for hotplug_bus in hotplug_buses.iter() {
1357 linux.hotplug_bus.push(hotplug_bus.clone());
1358 }
Daniel Verkamp1286b482021-11-30 15:14:16 -08001359 }
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001360
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08001361 #[cfg(feature = "direct")]
1362 if let Some(pmio) = &cfg.direct_pmio {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001363 let direct_io = Arc::new(
1364 devices::DirectIo::new(&pmio.path, false).context("failed to open direct io device")?,
1365 );
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08001366 for range in pmio.ranges.iter() {
1367 linux
1368 .io_bus
Junichi Uekawab180f9c2021-12-07 09:21:36 +09001369 .insert_sync(direct_io.clone(), range.base, range.len)
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08001370 .unwrap();
1371 }
1372 };
1373
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001374 #[cfg(feature = "direct")]
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07001375 if let Some(mmio) = &cfg.direct_mmio {
Xiong Zhang46471a02021-11-12 00:34:42 +08001376 let direct_mmio = Arc::new(
Junichi Uekawab180f9c2021-12-07 09:21:36 +09001377 devices::DirectMmio::new(&mmio.path, false, &mmio.ranges)
Xiong Zhang46471a02021-11-12 00:34:42 +08001378 .context("failed to open direct mmio device")?,
Daniel Verkamp6b298582021-08-16 15:37:11 -07001379 );
Xiong Zhang46471a02021-11-12 00:34:42 +08001380
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07001381 for range in mmio.ranges.iter() {
1382 linux
1383 .mmio_bus
Junichi Uekawab180f9c2021-12-07 09:21:36 +09001384 .insert_sync(direct_mmio.clone(), range.base, range.len)
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07001385 .unwrap();
1386 }
1387 };
1388
Daniel Verkamp6b298582021-08-16 15:37:11 -07001389 let gralloc = RutabagaGralloc::new().context("failed to create gralloc")?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001390 run_control(
1391 linux,
Zach Reiznerdc748482021-04-14 13:59:30 -07001392 sys_allocator,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001393 cfg,
Zach Reiznera60744b2019-02-13 17:33:32 -08001394 control_server_socket,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001395 control_tubes,
1396 balloon_host_tube,
1397 &disk_host_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07001398 #[cfg(feature = "usb")]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001399 usb_control_tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07001400 exit_evt,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001401 reset_evt,
Andrew Walbran1a19c672022-01-24 17:24:10 +00001402 crash_evt,
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +00001403 panic_rdtube,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001404 sigchld_fd,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001405 Arc::clone(&map_request),
Gurchetan Singh293913c2020-12-09 10:44:13 -08001406 gralloc,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001407 kvm_vcpu_ids,
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001408 iommu_host_tube,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001409 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07001410}
1411
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001412fn get_hp_bus<V: VmArch, Vcpu: VcpuArch>(
1413 linux: &RunnableLinuxVm<V, Vcpu>,
1414 host_addr: PciAddress,
1415) -> Result<(Arc<Mutex<dyn HotPlugBus>>, u8)> {
1416 for hp_bus in linux.hotplug_bus.iter() {
1417 if let Some(number) = hp_bus.lock().is_match(host_addr) {
1418 return Ok((hp_bus.clone(), number));
1419 }
1420 }
1421 Err(anyhow!("Failed to find a suitable hotplug bus"))
1422}
1423
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001424fn add_vfio_device<V: VmArch, Vcpu: VcpuArch>(
1425 linux: &mut RunnableLinuxVm<V, Vcpu>,
1426 sys_allocator: &mut SystemAllocator,
1427 cfg: &Config,
1428 control_tubes: &mut Vec<TaggedControlTube>,
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001429 iommu_host_tube: &Option<Tube>,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001430 vfio_path: &Path,
1431) -> Result<()> {
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001432 let host_os_str = vfio_path
1433 .file_name()
1434 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
1435 let host_str = host_os_str
1436 .to_str()
1437 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Daniel Verkamp906a38f2022-02-22 13:58:53 -08001438 let host_addr =
1439 PciAddress::from_string(host_str).context("failed to parse vfio pci address")?;
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001440
1441 let (hp_bus, bus_num) = get_hp_bus(linux, host_addr)?;
1442
Woody Chow055b81b2022-01-25 18:34:29 +09001443 let mut endpoints: BTreeMap<u32, Arc<Mutex<Box<dyn MemoryMapperTrait>>>> = BTreeMap::new();
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001444 let (vfio_pci_device, jail) = create_vfio_device(
1445 cfg,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001446 &linux.vm,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001447 sys_allocator,
1448 control_tubes,
1449 vfio_path,
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001450 Some(bus_num),
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001451 &mut endpoints,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001452 None,
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001453 if iommu_host_tube.is_some() {
1454 IommuDevType::VirtioIommu
1455 } else {
1456 IommuDevType::NoIommu
1457 },
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001458 )?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001459
1460 let pci_address = Arch::register_pci_device(linux, vfio_pci_device, jail, sys_allocator)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001461 .context("Failed to configure pci hotplug device")?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001462
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001463 if let Some(iommu_host_tube) = iommu_host_tube {
1464 let &endpoint_addr = endpoints.iter().next().unwrap().0;
1465 let mapper = endpoints.remove(&endpoint_addr).unwrap();
1466 if let Some(vfio_wrapper) = mapper.lock().as_vfio_wrapper() {
1467 let vfio_container = vfio_wrapper.as_vfio_container();
1468 let descriptor = vfio_container.lock().into_raw_descriptor()?;
1469 let request = VirtioIOMMURequest::VfioCommand(VirtioIOMMUVfioCommand::VfioDeviceAdd {
1470 endpoint_addr,
1471 container: {
1472 // Safe because the descriptor is uniquely owned by `descriptor`.
1473 unsafe { File::from_raw_descriptor(descriptor) }
1474 },
1475 });
1476
1477 match virtio_iommu_request(iommu_host_tube, &request)
1478 .map_err(|_| VirtioIOMMUVfioError::SocketFailed)?
1479 {
1480 VirtioIOMMUResponse::VfioResponse(VirtioIOMMUVfioResult::Ok) => (),
1481 resp => bail!("Unexpected message response: {:?}", resp),
1482 }
1483 };
1484 }
1485
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001486 let host_key = HostHotPlugKey::Vfio { host_addr };
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001487 let mut hp_bus = hp_bus.lock();
1488 hp_bus.add_hotplug_device(host_key, pci_address);
1489 hp_bus.hot_plug(pci_address);
1490 Ok(())
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001491}
1492
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001493fn remove_vfio_device<V: VmArch, Vcpu: VcpuArch>(
1494 linux: &RunnableLinuxVm<V, Vcpu>,
Xiong Zhang2d45b912021-05-13 16:22:25 +08001495 sys_allocator: &mut SystemAllocator,
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001496 iommu_host_tube: &Option<Tube>,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001497 vfio_path: &Path,
1498) -> Result<()> {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001499 let host_os_str = vfio_path
1500 .file_name()
1501 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
1502 let host_str = host_os_str
1503 .to_str()
1504 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Daniel Verkamp906a38f2022-02-22 13:58:53 -08001505 let host_addr =
1506 PciAddress::from_string(host_str).context("failed to parse vfio pci address")?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001507 let host_key = HostHotPlugKey::Vfio { host_addr };
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001508 for hp_bus in linux.hotplug_bus.iter() {
1509 let mut hp_bus_lock = hp_bus.lock();
1510 if let Some(pci_addr) = hp_bus_lock.get_hotplug_device(host_key) {
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001511 if let Some(iommu_host_tube) = iommu_host_tube {
1512 let request =
1513 VirtioIOMMURequest::VfioCommand(VirtioIOMMUVfioCommand::VfioDeviceDel {
1514 endpoint_addr: pci_addr.to_u32(),
1515 });
1516 match virtio_iommu_request(iommu_host_tube, &request)
1517 .map_err(|_| VirtioIOMMUVfioError::SocketFailed)?
1518 {
1519 VirtioIOMMUResponse::VfioResponse(VirtioIOMMUVfioResult::Ok) => (),
1520 resp => bail!("Unexpected message response: {:?}", resp),
1521 }
1522 }
1523
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001524 hp_bus_lock.hot_unplug(pci_addr);
Xiong Zhang2d45b912021-05-13 16:22:25 +08001525 sys_allocator.release_pci(pci_addr.bus, pci_addr.dev, pci_addr.func);
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001526 return Ok(());
1527 }
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001528 }
1529
Daniel Verkamp6b298582021-08-16 15:37:11 -07001530 Err(anyhow!("HotPlugBus hasn't been implemented"))
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001531}
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001532
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001533fn handle_vfio_command<V: VmArch, Vcpu: VcpuArch>(
1534 linux: &mut RunnableLinuxVm<V, Vcpu>,
1535 sys_allocator: &mut SystemAllocator,
1536 cfg: &Config,
1537 add_tubes: &mut Vec<TaggedControlTube>,
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001538 iommu_host_tube: &Option<Tube>,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001539 vfio_path: &Path,
1540 add: bool,
1541) -> VmResponse {
1542 let ret = if add {
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001543 add_vfio_device(
1544 linux,
1545 sys_allocator,
1546 cfg,
1547 add_tubes,
1548 iommu_host_tube,
1549 vfio_path,
1550 )
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001551 } else {
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001552 remove_vfio_device(linux, sys_allocator, iommu_host_tube, vfio_path)
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001553 };
1554
1555 match ret {
1556 Ok(()) => VmResponse::Ok,
1557 Err(e) => {
1558 error!("hanlde_vfio_command failure: {}", e);
1559 add_tubes.clear();
1560 VmResponse::Err(base::Error::new(libc::EINVAL))
1561 }
1562 }
1563}
1564
Zach Reiznerdc748482021-04-14 13:59:30 -07001565fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
1566 mut linux: RunnableLinuxVm<V, Vcpu>,
1567 mut sys_allocator: SystemAllocator,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001568 cfg: Config,
Zach Reiznera60744b2019-02-13 17:33:32 -08001569 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001570 mut control_tubes: Vec<TaggedControlTube>,
Andrew Walbran3cd93602022-01-25 13:59:23 +00001571 balloon_host_tube: Option<Tube>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001572 disk_host_tubes: &[Tube],
Daniel Verkampf1439d42021-05-21 13:55:10 -07001573 #[cfg(feature = "usb")] usb_control_tube: Tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07001574 exit_evt: Event,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001575 reset_evt: Event,
Andrew Walbran1a19c672022-01-24 17:24:10 +00001576 crash_evt: Event,
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +00001577 panic_rdtube: Tube,
Zach Reizner55a9e502018-10-03 10:22:32 -07001578 sigchld_fd: SignalFd,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001579 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Gurchetan Singh293913c2020-12-09 10:44:13 -08001580 mut gralloc: RutabagaGralloc,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001581 kvm_vcpu_ids: Vec<usize>,
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001582 iommu_host_tube: Option<Tube>,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001583) -> Result<ExitState> {
Zach Reizner5bed0d22018-03-28 02:31:11 -07001584 #[derive(PollToken)]
1585 enum Token {
1586 Exit,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001587 Reset,
Andrew Walbran1a19c672022-01-24 17:24:10 +00001588 Crash,
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +00001589 Panic,
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001590 Suspend,
Zach Reizner5bed0d22018-03-28 02:31:11 -07001591 ChildSignal,
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001592 IrqFd { index: IrqEventIndex },
Zach Reiznera60744b2019-02-13 17:33:32 -08001593 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07001594 VmControl { index: usize },
Tomasz Nowicki98801002022-02-23 21:00:00 +00001595 DelayedIrqFd,
Zach Reizner5bed0d22018-03-28 02:31:11 -07001596 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001597
Zach Reizner19ad1f32019-12-12 18:58:50 -08001598 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08001599 .set_raw_mode()
1600 .expect("failed to set terminal raw mode");
1601
Michael Hoylee392c462020-10-07 03:29:24 -07001602 let wait_ctx = WaitContext::build_with(&[
Zach Reiznerdc748482021-04-14 13:59:30 -07001603 (&exit_evt, Token::Exit),
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001604 (&reset_evt, Token::Reset),
Andrew Walbran1a19c672022-01-24 17:24:10 +00001605 (&crash_evt, Token::Crash),
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +00001606 (&panic_rdtube, Token::Panic),
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001607 (&linux.suspend_evt, Token::Suspend),
Zach Reiznerb2110be2019-07-23 15:55:03 -07001608 (&sigchld_fd, Token::ChildSignal),
1609 ])
Daniel Verkamp6b298582021-08-16 15:37:11 -07001610 .context("failed to add descriptor to wait context")?;
Zach Reiznerb2110be2019-07-23 15:55:03 -07001611
Zach Reiznera60744b2019-02-13 17:33:32 -08001612 if let Some(socket_server) = &control_server_socket {
Michael Hoylee392c462020-10-07 03:29:24 -07001613 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08001614 .add(socket_server, Token::VmControlServer)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001615 .context("failed to add descriptor to wait context")?;
Zach Reiznera60744b2019-02-13 17:33:32 -08001616 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001617 for (index, socket) in control_tubes.iter().enumerate() {
Michael Hoylee392c462020-10-07 03:29:24 -07001618 wait_ctx
Zach Reizner55a9e502018-10-03 10:22:32 -07001619 .add(socket.as_ref(), Token::VmControl { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07001620 .context("failed to add descriptor to wait context")?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001621 }
1622
Steven Richmanf32d0b42020-06-20 21:45:32 -07001623 let events = linux
1624 .irq_chip
1625 .irq_event_tokens()
Daniel Verkamp6b298582021-08-16 15:37:11 -07001626 .context("failed to add descriptor to wait context")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07001627
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001628 for (index, _gsi, evt) in events {
Michael Hoylee392c462020-10-07 03:29:24 -07001629 wait_ctx
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001630 .add(&evt, Token::IrqFd { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07001631 .context("failed to add descriptor to wait context")?;
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08001632 }
1633
Tomasz Nowicki98801002022-02-23 21:00:00 +00001634 if let Some(delayed_ioapic_irq_trigger) = linux.irq_chip.irq_delayed_event_token()? {
1635 wait_ctx
1636 .add(&delayed_ioapic_irq_trigger, Token::DelayedIrqFd)
1637 .context("failed to add descriptor to wait context")?;
1638 }
1639
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001640 if cfg.sandbox {
Lepton Wu20333e42019-03-14 10:48:03 -07001641 // Before starting VCPUs, in case we started with some capabilities, drop them all.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001642 drop_capabilities().context("failed to drop process capabilities")?;
Lepton Wu20333e42019-03-14 10:48:03 -07001643 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08001644
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001645 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1646 // Create a channel for GDB thread.
1647 let (to_gdb_channel, from_vcpu_channel) = if linux.gdb.is_some() {
1648 let (s, r) = mpsc::channel();
1649 (Some(s), Some(r))
1650 } else {
1651 (None, None)
1652 };
1653
Steven Richmanf32d0b42020-06-20 21:45:32 -07001654 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
1655 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
Steven Richmanf32d0b42020-06-20 21:45:32 -07001656 let use_hypervisor_signals = !linux
1657 .vm
1658 .get_hypervisor()
Andrew Walbran985491a2022-01-27 13:47:40 +00001659 .check_capability(HypervisorCap::ImmediateExit);
Anton Romanov5acc0f52022-01-28 00:18:11 +00001660 vcpu::setup_vcpu_signal_handler::<Vcpu>(use_hypervisor_signals)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07001661
Zach Reizner304e7312020-09-29 16:00:24 -07001662 let vcpus: Vec<Option<_>> = match linux.vcpus.take() {
Andrew Walbran9cfdbd92021-01-11 17:40:34 +00001663 Some(vec) => vec.into_iter().map(Some).collect(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07001664 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
1665 };
Yusuke Sato31e136a2021-08-18 11:51:38 -07001666 // Enable core scheduling before creating vCPUs so that the cookie will be
1667 // shared by all vCPU threads.
1668 // TODO(b/199312402): Avoid enabling core scheduling for the crosvm process
1669 // itself for even better performance. Only vCPUs need the feature.
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001670 if cfg.per_vm_core_scheduling {
Yusuke Sato31e136a2021-08-18 11:51:38 -07001671 if let Err(e) = enable_core_scheduling() {
1672 error!("Failed to enable core scheduling: {}", e);
1673 }
1674 }
Vineeth Pillai2b6855e2022-01-12 16:57:22 +00001675 let vcpu_cgroup_tasks_file = match &cfg.vcpu_cgroup_path {
1676 None => None,
1677 Some(cgroup_path) => {
1678 // Move main process to cgroup_path
1679 let mut f = File::create(&cgroup_path.join("tasks"))?;
1680 f.write_all(process::id().to_string().as_bytes())?;
1681 Some(f)
1682 }
1683 };
Daniel Verkamp94c35272019-09-12 13:31:30 -07001684 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Dylan Reidb0492662019-05-17 14:50:13 -07001685 let (to_vcpu_channel, from_main_channel) = mpsc::channel();
Daniel Verkampc677fb42020-09-08 13:47:49 -07001686 let vcpu_affinity = match linux.vcpu_affinity.clone() {
1687 Some(VcpuAffinity::Global(v)) => v,
1688 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(),
1689 None => Default::default(),
1690 };
Anton Romanov5acc0f52022-01-28 00:18:11 +00001691 let handle = vcpu::run_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001692 cpu_id,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001693 kvm_vcpu_ids[cpu_id],
Zach Reizner55a9e502018-10-03 10:22:32 -07001694 vcpu,
Daniel Verkamp6b298582021-08-16 15:37:11 -07001695 linux.vm.try_clone().context("failed to clone vm")?,
1696 linux
1697 .irq_chip
1698 .try_box_clone()
1699 .context("failed to clone irqchip")?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001700 linux.vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001701 linux.rt_cpus.contains(&cpu_id),
Daniel Verkampc677fb42020-09-08 13:47:49 -07001702 vcpu_affinity,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09001703 linux.delay_rt,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001704 linux.no_smt,
Zach Reizner55a9e502018-10-03 10:22:32 -07001705 vcpu_thread_barrier.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07001706 linux.has_bios,
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07001707 (*linux.io_bus).clone(),
1708 (*linux.mmio_bus).clone(),
Daniel Verkamp6b298582021-08-16 15:37:11 -07001709 exit_evt.try_clone().context("failed to clone event")?,
Andrew Walbranb28ae8e2022-01-17 14:33:10 +00001710 reset_evt.try_clone().context("failed to clone event")?,
Andrew Walbran1a19c672022-01-24 17:24:10 +00001711 crash_evt.try_clone().context("failed to clone event")?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001712 linux.vm.check_capability(VmCap::PvClockSuspend),
Dylan Reidb0492662019-05-17 14:50:13 -07001713 from_main_channel,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001714 use_hypervisor_signals,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001715 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1716 to_gdb_channel.clone(),
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001717 cfg.per_vm_core_scheduling,
1718 cfg.host_cpu_topology,
Zide Chen344e2432022-01-28 14:58:53 -08001719 cfg.privileged_vm,
Vineeth Pillai2b6855e2022-01-12 16:57:22 +00001720 match vcpu_cgroup_tasks_file {
1721 None => None,
1722 Some(ref f) => Some(
1723 f.try_clone()
1724 .context("failed to clone vcpu cgroup tasks file")?,
1725 ),
1726 },
Zach Reizner55a9e502018-10-03 10:22:32 -07001727 )?;
Dylan Reidb0492662019-05-17 14:50:13 -07001728 vcpu_handles.push((handle, to_vcpu_channel));
Dylan Reid059a1882018-07-23 17:58:09 -07001729 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001730
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001731 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1732 // Spawn GDB thread.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001733 if let Some((gdb_port_num, gdb_control_tube)) = linux.gdb.take() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001734 let to_vcpu_channels = vcpu_handles
1735 .iter()
1736 .map(|(_handle, channel)| channel.clone())
1737 .collect();
1738 let target = GdbStub::new(
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001739 gdb_control_tube,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001740 to_vcpu_channels,
1741 from_vcpu_channel.unwrap(), // Must succeed to unwrap()
1742 );
1743 thread::Builder::new()
1744 .name("gdb".to_owned())
1745 .spawn(move || gdb_thread(target, gdb_port_num))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001746 .context("failed to spawn GDB thread")?;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001747 };
1748
Dylan Reid059a1882018-07-23 17:58:09 -07001749 vcpu_thread_barrier.wait();
1750
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001751 let mut exit_state = ExitState::Stop;
Charles William Dick54045012021-07-27 19:11:53 +09001752 let mut balloon_stats_id: u64 = 0;
1753
Michael Hoylee392c462020-10-07 03:29:24 -07001754 'wait: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07001755 let events = {
Michael Hoylee392c462020-10-07 03:29:24 -07001756 match wait_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001757 Ok(v) => v,
1758 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001759 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001760 break;
1761 }
1762 }
1763 };
Zach Reiznera60744b2019-02-13 17:33:32 -08001764
1765 let mut vm_control_indices_to_remove = Vec::new();
Michael Hoylee392c462020-10-07 03:29:24 -07001766 for event in events.iter().filter(|e| e.is_readable) {
1767 match event.token {
Zach Reizner5bed0d22018-03-28 02:31:11 -07001768 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001769 info!("vcpu requested shutdown");
Michael Hoylee392c462020-10-07 03:29:24 -07001770 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001771 }
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001772 Token::Reset => {
1773 info!("vcpu requested reset");
1774 exit_state = ExitState::Reset;
1775 break 'wait;
1776 }
Andrew Walbran1a19c672022-01-24 17:24:10 +00001777 Token::Crash => {
1778 info!("vcpu crashed");
1779 exit_state = ExitState::Crash;
1780 break 'wait;
1781 }
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +00001782 Token::Panic => {
1783 let mut break_to_wait: bool = true;
1784 match panic_rdtube.recv::<u8>() {
1785 Ok(panic_code) => {
1786 let panic_code = PvPanicCode::from_u8(panic_code);
1787 info!("Guest reported panic [Code: {}]", panic_code);
1788 if panic_code == PvPanicCode::CrashLoaded {
1789 // VM is booting to crash kernel.
1790 break_to_wait = false;
1791 }
1792 }
1793 Err(e) => {
1794 warn!("failed to recv panic event: {} ", e);
1795 }
1796 }
1797 if break_to_wait {
1798 exit_state = ExitState::GuestPanic;
1799 break 'wait;
1800 }
1801 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001802 Token::Suspend => {
1803 info!("VM requested suspend");
1804 linux.suspend_evt.read().unwrap();
Anton Romanov5acc0f52022-01-28 00:18:11 +00001805 vcpu::kick_all_vcpus(
Zach Reiznerdc748482021-04-14 13:59:30 -07001806 &vcpu_handles,
1807 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08001808 VcpuControl::RunState(VmRunMode::Suspending),
Zach Reiznerdc748482021-04-14 13:59:30 -07001809 );
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001810 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001811 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001812 // Print all available siginfo structs, then exit the loop.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001813 while let Some(siginfo) =
1814 sigchld_fd.read().context("failed to create signalfd")?
1815 {
Zach Reizner3ba00982019-01-23 19:04:43 -08001816 let pid = siginfo.ssi_pid;
1817 let pid_label = match linux.pid_debug_label_map.get(&pid) {
1818 Some(label) => format!("{} (pid {})", label, pid),
1819 None => format!("pid {}", pid),
1820 };
David Tolnayf5032762018-12-03 10:46:45 -08001821 error!(
1822 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08001823 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08001824 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08001825 }
Michael Hoylee392c462020-10-07 03:29:24 -07001826 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001827 }
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001828 Token::IrqFd { index } => {
1829 if let Err(e) = linux.irq_chip.service_irq_event(index) {
1830 error!("failed to signal irq {}: {}", index, e);
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08001831 }
1832 }
Tomasz Nowicki98801002022-02-23 21:00:00 +00001833 Token::DelayedIrqFd => {
1834 if let Err(e) = linux.irq_chip.process_delayed_irq_events() {
1835 warn!("can't deliver delayed irqs: {}", e);
1836 }
1837 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001838 Token::VmControlServer => {
1839 if let Some(socket_server) = &control_server_socket {
1840 match socket_server.accept() {
1841 Ok(socket) => {
Michael Hoylee392c462020-10-07 03:29:24 -07001842 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08001843 .add(
1844 &socket,
1845 Token::VmControl {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001846 index: control_tubes.len(),
Zach Reiznera60744b2019-02-13 17:33:32 -08001847 },
1848 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001849 .context("failed to add descriptor to wait context")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001850 control_tubes.push(TaggedControlTube::Vm(Tube::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08001851 }
1852 Err(e) => error!("failed to accept socket: {}", e),
1853 }
1854 }
1855 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001856 Token::VmControl { index } => {
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001857 let mut add_tubes = Vec::new();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001858 if let Some(socket) = control_tubes.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001859 match socket {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001860 TaggedControlTube::Vm(tube) => match tube.recv::<VmRequest>() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001861 Ok(request) => {
1862 let mut run_mode_opt = None;
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001863 let response = match request {
1864 VmRequest::VfioCommand { vfio_path, add } => {
1865 handle_vfio_command(
1866 &mut linux,
1867 &mut sys_allocator,
1868 &cfg,
1869 &mut add_tubes,
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001870 &iommu_host_tube,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001871 &vfio_path,
1872 add,
1873 )
1874 }
1875 _ => request.execute(
1876 &mut run_mode_opt,
Andrew Walbran3cd93602022-01-25 13:59:23 +00001877 balloon_host_tube.as_ref(),
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001878 &mut balloon_stats_id,
1879 disk_host_tubes,
Peter Fang6ca03232021-12-20 02:17:21 -08001880 &mut linux.pm,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001881 #[cfg(feature = "usb")]
1882 Some(&usb_control_tube),
1883 #[cfg(not(feature = "usb"))]
1884 None,
1885 &mut linux.bat_control,
1886 &vcpu_handles,
1887 ),
1888 };
1889
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001890 if let Err(e) = tube.send(&response) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001891 error!("failed to send VmResponse: {}", e);
1892 }
1893 if let Some(run_mode) = run_mode_opt {
1894 info!("control socket changed run mode to {}", run_mode);
1895 match run_mode {
1896 VmRunMode::Exiting => {
Michael Hoylee392c462020-10-07 03:29:24 -07001897 break 'wait;
Jakub Starond99cd0a2019-04-11 14:09:39 -07001898 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001899 other => {
Chuanxiao Dong2bbe85c2020-11-12 17:18:07 +08001900 if other == VmRunMode::Running {
Daniel Verkampda4e8a92021-07-21 13:49:02 -07001901 for dev in &linux.resume_notify_devices {
1902 dev.lock().resume_imminent();
1903 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001904 }
Anton Romanov5acc0f52022-01-28 00:18:11 +00001905 vcpu::kick_all_vcpus(
Steven Richman11dc6712020-09-02 15:39:14 -07001906 &vcpu_handles,
Zach Reiznerdc748482021-04-14 13:59:30 -07001907 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08001908 VcpuControl::RunState(other),
Steven Richman11dc6712020-09-02 15:39:14 -07001909 );
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001910 }
1911 }
1912 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001913 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001914 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001915 if let TubeError::Disconnected = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001916 vm_control_indices_to_remove.push(index);
1917 } else {
1918 error!("failed to recv VmRequest: {}", e);
1919 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001920 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001921 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001922 TaggedControlTube::VmMemory(tube) => {
1923 match tube.recv::<VmMemoryRequest>() {
1924 Ok(request) => {
1925 let response = request.execute(
1926 &mut linux.vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07001927 &mut sys_allocator,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001928 Arc::clone(&map_request),
1929 &mut gralloc,
1930 );
1931 if let Err(e) = tube.send(&response) {
1932 error!("failed to send VmMemoryControlResponse: {}", e);
1933 }
1934 }
1935 Err(e) => {
1936 if let TubeError::Disconnected = e {
1937 vm_control_indices_to_remove.push(index);
1938 } else {
1939 error!("failed to recv VmMemoryControlRequest: {}", e);
1940 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001941 }
1942 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001943 }
1944 TaggedControlTube::VmIrq(tube) => match tube.recv::<VmIrqRequest>() {
Xiong Zhang2515b752019-09-19 10:29:02 +08001945 Ok(request) => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001946 let response = {
1947 let irq_chip = &mut linux.irq_chip;
1948 request.execute(
1949 |setup| match setup {
1950 IrqSetup::Event(irq, ev) => {
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001951 if let Some(event_index) = irq_chip
1952 .register_irq_event(irq, ev, None)?
1953 {
1954 match wait_ctx.add(
1955 ev,
1956 Token::IrqFd {
1957 index: event_index
1958 },
1959 ) {
1960 Err(e) => {
1961 warn!("failed to add IrqFd to poll context: {}", e);
1962 Err(e)
1963 },
1964 Ok(_) => {
1965 Ok(())
1966 }
1967 }
1968 } else {
1969 Ok(())
1970 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001971 }
1972 IrqSetup::Route(route) => irq_chip.route_irq(route),
Xiong Zhang4fbc5542021-06-01 11:29:14 +08001973 IrqSetup::UnRegister(irq, ev) => irq_chip.unregister_irq_event(irq, ev),
Steven Richmanf32d0b42020-06-20 21:45:32 -07001974 },
Zach Reiznerdc748482021-04-14 13:59:30 -07001975 &mut sys_allocator,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001976 )
1977 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001978 if let Err(e) = tube.send(&response) {
Xiong Zhang2515b752019-09-19 10:29:02 +08001979 error!("failed to send VmIrqResponse: {}", e);
1980 }
1981 }
1982 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001983 if let TubeError::Disconnected = e {
Xiong Zhang2515b752019-09-19 10:29:02 +08001984 vm_control_indices_to_remove.push(index);
1985 } else {
1986 error!("failed to recv VmIrqRequest: {}", e);
1987 }
1988 }
1989 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001990 TaggedControlTube::VmMsync(tube) => {
1991 match tube.recv::<VmMsyncRequest>() {
1992 Ok(request) => {
1993 let response = request.execute(&mut linux.vm);
1994 if let Err(e) = tube.send(&response) {
1995 error!("failed to send VmMsyncResponse: {}", e);
1996 }
1997 }
1998 Err(e) => {
1999 if let TubeError::Disconnected = e {
2000 vm_control_indices_to_remove.push(index);
2001 } else {
2002 error!("failed to recv VmMsyncRequest: {}", e);
2003 }
Daniel Verkampe1980a92020-02-07 11:00:55 -08002004 }
2005 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002006 }
2007 TaggedControlTube::Fs(tube) => match tube.recv::<FsMappingRequest>() {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002008 Ok(request) => {
2009 let response =
Zach Reiznerdc748482021-04-14 13:59:30 -07002010 request.execute(&mut linux.vm, &mut sys_allocator);
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002011 if let Err(e) = tube.send(&response) {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002012 error!("failed to send VmResponse: {}", e);
2013 }
2014 }
2015 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002016 if let TubeError::Disconnected = e {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002017 vm_control_indices_to_remove.push(index);
2018 } else {
2019 error!("failed to recv VmResponse: {}", e);
2020 }
2021 }
2022 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08002023 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002024 }
Xiong Zhangc78e72b2021-04-08 11:31:41 +08002025 if !add_tubes.is_empty() {
2026 for (idx, socket) in add_tubes.iter().enumerate() {
2027 wait_ctx
2028 .add(
2029 socket.as_ref(),
2030 Token::VmControl {
2031 index: idx + control_tubes.len(),
2032 },
2033 )
2034 .context(
2035 "failed to add hotplug vfio-pci descriptor ot wait context",
2036 )?;
2037 }
2038 control_tubes.append(&mut add_tubes);
2039 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002040 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002041 }
2042 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002043
Vikram Auradkarede68c72021-07-01 14:33:54 -07002044 // It's possible more data is readable and buffered while the socket is hungup,
2045 // so don't delete the tube from the poll context until we're sure all the
2046 // data is read.
2047 // Below case covers a condition where we have received a hungup event and the tube is not
2048 // readable.
2049 // In case of readable tube, once all data is read, any attempt to read more data on hungup
2050 // tube should fail. On such failure, we get Disconnected error and index gets added to
2051 // vm_control_indices_to_remove by the time we reach here.
2052 for event in events.iter().filter(|e| e.is_hungup && !e.is_readable) {
2053 if let Token::VmControl { index } = event.token {
2054 vm_control_indices_to_remove.push(index);
Zach Reizner39aa26b2017-12-12 18:03:23 -08002055 }
2056 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002057
2058 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08002059 // preserves correct indexes as each element is removed.
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07002060 vm_control_indices_to_remove.sort_unstable_by_key(|&k| Reverse(k));
Zach Reiznera60744b2019-02-13 17:33:32 -08002061 vm_control_indices_to_remove.dedup();
2062 for index in vm_control_indices_to_remove {
Michael Hoylee392c462020-10-07 03:29:24 -07002063 // Delete the socket from the `wait_ctx` synchronously. Otherwise, the kernel will do
2064 // this automatically when the FD inserted into the `wait_ctx` is closed after this
Zide Chen89584072019-11-14 10:33:51 -08002065 // if-block, but this removal can be deferred unpredictably. In some instances where the
Michael Hoylee392c462020-10-07 03:29:24 -07002066 // system is under heavy load, we can even get events returned by `wait_ctx` for an FD
Zide Chen89584072019-11-14 10:33:51 -08002067 // that has already been closed. Because the token associated with that spurious event
2068 // now belongs to a different socket, the control loop will start to interact with
2069 // sockets that might not be ready to use. This can cause incorrect hangup detection or
2070 // blocking on a socket that will never be ready. See also: crbug.com/1019986
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002071 if let Some(socket) = control_tubes.get(index) {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002072 wait_ctx
2073 .delete(socket)
2074 .context("failed to remove descriptor from wait context")?;
Zide Chen89584072019-11-14 10:33:51 -08002075 }
2076
2077 // This line implicitly drops the socket at `index` when it gets returned by
2078 // `swap_remove`. After this line, the socket at `index` is not the one from
2079 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
Michael Hoylee392c462020-10-07 03:29:24 -07002080 // use `wait_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002081 control_tubes.swap_remove(index);
2082 if let Some(tube) = control_tubes.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07002083 wait_ctx
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002084 .modify(tube, EventType::Read, Token::VmControl { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002085 .context("failed to add descriptor to wait context")?;
Zach Reiznera60744b2019-02-13 17:33:32 -08002086 }
2087 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002088 }
2089
Anton Romanov5acc0f52022-01-28 00:18:11 +00002090 vcpu::kick_all_vcpus(
Zach Reiznerdc748482021-04-14 13:59:30 -07002091 &vcpu_handles,
2092 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08002093 VcpuControl::RunState(VmRunMode::Exiting),
Zach Reiznerdc748482021-04-14 13:59:30 -07002094 );
Steven Richman11dc6712020-09-02 15:39:14 -07002095 for (handle, _) in vcpu_handles {
2096 if let Err(e) = handle.join() {
2097 error!("failed to join vcpu thread: {:?}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08002098 }
2099 }
2100
Daniel Verkamp94c35272019-09-12 13:31:30 -07002101 // Explicitly drop the VM structure here to allow the devices to clean up before the
2102 // control sockets are closed when this function exits.
2103 mem::drop(linux);
2104
Zach Reizner19ad1f32019-12-12 18:58:50 -08002105 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08002106 .set_canon_mode()
2107 .expect("failed to restore canonical mode for terminal");
2108
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002109 Ok(exit_state)
Zach Reizner39aa26b2017-12-12 18:03:23 -08002110}
Daniel Verkamp5586ff52022-02-24 16:34:55 -08002111
2112#[cfg(test)]
2113mod tests {
2114 use super::*;
2115
2116 // Create a file-backed mapping parameters struct with the given `address` and `size` and other
2117 // parameters set to default values.
2118 fn test_file_backed_mapping(address: u64, size: u64) -> FileBackedMappingParameters {
2119 FileBackedMappingParameters {
2120 address,
2121 size,
2122 path: PathBuf::new(),
2123 offset: 0,
2124 writable: false,
2125 sync: false,
2126 }
2127 }
2128
2129 #[test]
2130 fn guest_mem_file_backed_mappings_overlap() {
2131 // Base case: no file mappings; output layout should be identical.
2132 assert_eq!(
2133 punch_holes_in_guest_mem_layout_for_mappings(
2134 vec![
2135 (GuestAddress(0), 0xD000_0000),
2136 (GuestAddress(0x1_0000_0000), 0x8_0000),
2137 ],
2138 &[]
2139 ),
2140 vec![
2141 (GuestAddress(0), 0xD000_0000),
2142 (GuestAddress(0x1_0000_0000), 0x8_0000),
2143 ]
2144 );
2145
2146 // File mapping that does not overlap guest memory.
2147 assert_eq!(
2148 punch_holes_in_guest_mem_layout_for_mappings(
2149 vec![
2150 (GuestAddress(0), 0xD000_0000),
2151 (GuestAddress(0x1_0000_0000), 0x8_0000),
2152 ],
2153 &[test_file_backed_mapping(0xD000_0000, 0x1000)]
2154 ),
2155 vec![
2156 (GuestAddress(0), 0xD000_0000),
2157 (GuestAddress(0x1_0000_0000), 0x8_0000),
2158 ]
2159 );
2160
2161 // File mapping at the start of the low address space region.
2162 assert_eq!(
2163 punch_holes_in_guest_mem_layout_for_mappings(
2164 vec![
2165 (GuestAddress(0), 0xD000_0000),
2166 (GuestAddress(0x1_0000_0000), 0x8_0000),
2167 ],
2168 &[test_file_backed_mapping(0, 0x2000)]
2169 ),
2170 vec![
2171 (GuestAddress(0x2000), 0xD000_0000 - 0x2000),
2172 (GuestAddress(0x1_0000_0000), 0x8_0000),
2173 ]
2174 );
2175
2176 // File mapping at the end of the low address space region.
2177 assert_eq!(
2178 punch_holes_in_guest_mem_layout_for_mappings(
2179 vec![
2180 (GuestAddress(0), 0xD000_0000),
2181 (GuestAddress(0x1_0000_0000), 0x8_0000),
2182 ],
2183 &[test_file_backed_mapping(0xD000_0000 - 0x2000, 0x2000)]
2184 ),
2185 vec![
2186 (GuestAddress(0), 0xD000_0000 - 0x2000),
2187 (GuestAddress(0x1_0000_0000), 0x8_0000),
2188 ]
2189 );
2190
2191 // File mapping fully contained within the middle of the low address space region.
2192 assert_eq!(
2193 punch_holes_in_guest_mem_layout_for_mappings(
2194 vec![
2195 (GuestAddress(0), 0xD000_0000),
2196 (GuestAddress(0x1_0000_0000), 0x8_0000),
2197 ],
2198 &[test_file_backed_mapping(0x1000, 0x2000)]
2199 ),
2200 vec![
2201 (GuestAddress(0), 0x1000),
2202 (GuestAddress(0x3000), 0xD000_0000 - 0x3000),
2203 (GuestAddress(0x1_0000_0000), 0x8_0000),
2204 ]
2205 );
2206
2207 // File mapping at the start of the high address space region.
2208 assert_eq!(
2209 punch_holes_in_guest_mem_layout_for_mappings(
2210 vec![
2211 (GuestAddress(0), 0xD000_0000),
2212 (GuestAddress(0x1_0000_0000), 0x8_0000),
2213 ],
2214 &[test_file_backed_mapping(0x1_0000_0000, 0x2000)]
2215 ),
2216 vec![
2217 (GuestAddress(0), 0xD000_0000),
2218 (GuestAddress(0x1_0000_2000), 0x8_0000 - 0x2000),
2219 ]
2220 );
2221
2222 // File mapping at the end of the high address space region.
2223 assert_eq!(
2224 punch_holes_in_guest_mem_layout_for_mappings(
2225 vec![
2226 (GuestAddress(0), 0xD000_0000),
2227 (GuestAddress(0x1_0000_0000), 0x8_0000),
2228 ],
2229 &[test_file_backed_mapping(0x1_0008_0000 - 0x2000, 0x2000)]
2230 ),
2231 vec![
2232 (GuestAddress(0), 0xD000_0000),
2233 (GuestAddress(0x1_0000_0000), 0x8_0000 - 0x2000),
2234 ]
2235 );
2236
2237 // File mapping fully contained within the middle of the high address space region.
2238 assert_eq!(
2239 punch_holes_in_guest_mem_layout_for_mappings(
2240 vec![
2241 (GuestAddress(0), 0xD000_0000),
2242 (GuestAddress(0x1_0000_0000), 0x8_0000),
2243 ],
2244 &[test_file_backed_mapping(0x1_0000_1000, 0x2000)]
2245 ),
2246 vec![
2247 (GuestAddress(0), 0xD000_0000),
2248 (GuestAddress(0x1_0000_0000), 0x1000),
2249 (GuestAddress(0x1_0000_3000), 0x8_0000 - 0x3000),
2250 ]
2251 );
2252
2253 // File mapping overlapping two guest memory regions.
2254 assert_eq!(
2255 punch_holes_in_guest_mem_layout_for_mappings(
2256 vec![
2257 (GuestAddress(0), 0xD000_0000),
2258 (GuestAddress(0x1_0000_0000), 0x8_0000),
2259 ],
2260 &[test_file_backed_mapping(0xA000_0000, 0x60002000)]
2261 ),
2262 vec![
2263 (GuestAddress(0), 0xA000_0000),
2264 (GuestAddress(0x1_0000_2000), 0x8_0000 - 0x2000),
2265 ]
2266 );
2267 }
2268}