blob: d170ee9b15f44ca065fb690bb40dfd128e5f6aa5 [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Chuanxiao Dongcb03ec62022-01-20 08:25:38 +08005use std::cmp::{max, Reverse};
Anton Romanov5acc0f52022-01-28 00:18:11 +00006use std::collections::BTreeMap;
7use std::convert::TryInto;
Dylan Reid059a1882018-07-23 17:58:09 -07008use std::fs::{File, OpenOptions};
Vineeth Pillai2b6855e2022-01-12 16:57:22 +00009use std::io::prelude::*;
Federico 'Morg' Pareschia1184822021-09-09 10:52:58 +090010use std::io::stdin;
Steven Richmanf32d0b42020-06-20 21:45:32 -070011use std::iter;
Daniel Verkamp94c35272019-09-12 13:31:30 -070012use std::mem;
Haiwei Li09b7b8e2022-02-18 18:16:05 +080013use std::ops::RangeInclusive;
Anton Romanovd43ae3c2022-01-31 17:32:54 +000014#[cfg(feature = "gpu")]
15use std::os::unix::net::UnixStream;
16use std::os::unix::prelude::OpenOptionsExt;
Xiong Zhangf7874712021-12-24 10:53:59 +080017use std::path::{Path, PathBuf};
Dylan Reidb0492662019-05-17 14:50:13 -070018use std::sync::{mpsc, Arc, Barrier};
Hikaru Nishida584e52c2021-04-27 17:37:08 +090019use std::time::Duration;
Dylan Reidb0492662019-05-17 14:50:13 -070020
Vineeth Pillai2b6855e2022-01-12 16:57:22 +000021use std::process;
Anton Romanov5acc0f52022-01-28 00:18:11 +000022#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reizner39aa26b2017-12-12 18:03:23 -080023use std::thread;
Zach Reizner39aa26b2017-12-12 18:03:23 -080024
Anton Romanov5acc0f52022-01-28 00:18:11 +000025use libc;
Zach Reizner39aa26b2017-12-12 18:03:23 -080026
Tomasz Jeznach42644642020-05-20 23:27:59 -070027use acpi_tables::sdt::SDT;
28
Daniel Verkamp6b298582021-08-16 15:37:11 -070029use anyhow::{anyhow, bail, Context, Result};
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +090030use base::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080031use base::*;
Anton Romanov5acc0f52022-01-28 00:18:11 +000032use devices::serial_device::SerialHardware;
Zide Chenafdb9382021-06-17 12:04:43 -070033use devices::vfio::{VfioCommonSetup, VfioCommonTrait};
Woody Chow055b81b2022-01-25 18:34:29 +090034use devices::virtio::memory_mapper::MemoryMapperTrait;
Anton Romanovd43ae3c2022-01-31 17:32:54 +000035#[cfg(feature = "gpu")]
Anton Romanov5acc0f52022-01-28 00:18:11 +000036use devices::virtio::{self, EventDevice};
paulhsiace17e6e2020-08-28 18:37:45 +080037#[cfg(feature = "audio")]
38use devices::Ac97Dev;
Xiong Zhang17b0daf2019-04-23 17:14:50 +080039use devices::{
Anton Romanov5acc0f52022-01-28 00:18:11 +000040 self, BusDeviceObj, HostHotPlugKey, HotPlugBus, IrqEventIndex, KvmKernelIrqChip, PciAddress,
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +000041 PciBridge, PciDevice, PcieHostRootPort, PcieRootPort, PvPanicCode, PvPanicPciDevice,
Woody Chow055b81b2022-01-25 18:34:29 +090042 StubPciDevice, VirtioPciDevice,
Xiong Zhang17b0daf2019-04-23 17:14:50 +080043};
Chuanxiao Donga8d427b2022-01-07 10:26:24 +080044use devices::{CoIommuDev, IommuDevType};
Daniel Verkampf1439d42021-05-21 13:55:10 -070045#[cfg(feature = "usb")]
46use devices::{HostBackendDeviceProvider, XhciController};
Steven Richmanf32d0b42020-06-20 21:45:32 -070047use hypervisor::kvm::{Kvm, KvmVcpu, KvmVm};
Anton Romanov5acc0f52022-01-28 00:18:11 +000048use hypervisor::{HypervisorCap, ProtectionType, Vm, VmCap};
Allen Webbf3024c82020-06-19 07:19:48 -070049use minijail::{self, Minijail};
Anton Romanov5acc0f52022-01-28 00:18:11 +000050use resources::{Alloc, SystemAllocator};
Gurchetan Singh293913c2020-12-09 10:44:13 -080051use rutabaga_gfx::RutabagaGralloc;
Dylan Reidb0492662019-05-17 14:50:13 -070052use sync::Mutex;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080053use vm_control::*;
Sergey Senozhatskyd78d05b2021-04-13 20:59:58 +090054use vm_memory::{GuestAddress, GuestMemory, MemoryPolicy};
Zach Reizner39aa26b2017-12-12 18:03:23 -080055
Keiichi Watanabec5262e92020-10-21 15:57:33 +090056#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
57use crate::gdb::{gdb_thread, GdbStub};
Anton Romanovd43ae3c2022-01-31 17:32:54 +000058use crate::{Config, Executable, SharedDir, SharedDirKind, VfioType};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070059use arch::{
Keiichi Watanabe553d2192021-08-16 16:42:27 +090060 self, LinuxArch, RunnableLinuxVm, VcpuAffinity, VirtioDeviceStub, VmComponents, VmImage,
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070061};
Sonny Raoed517d12018-02-13 22:09:43 -080062
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080063#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070064use {
65 aarch64::AArch64 as Arch,
Steven Richman11dc6712020-09-02 15:39:14 -070066 devices::IrqChipAArch64 as IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -070067 hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
68};
Zach Reizner55a9e502018-10-03 10:22:32 -070069#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070070use {
Steven Richman11dc6712020-09-02 15:39:14 -070071 devices::{IrqChipX86_64 as IrqChipArch, KvmSplitIrqChip},
72 hypervisor::{VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
Steven Richmanf32d0b42020-06-20 21:45:32 -070073 x86_64::X8664arch as Arch,
74};
Zach Reizner39aa26b2017-12-12 18:03:23 -080075
Anton Romanov5acc0f52022-01-28 00:18:11 +000076mod device_helpers;
77use device_helpers::*;
78mod jail_helpers;
79use jail_helpers::*;
80mod vcpu;
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +090081
David Tolnay2b089fc2019-03-04 15:33:22 -080082#[cfg(feature = "gpu")]
Anton Romanov5acc0f52022-01-28 00:18:11 +000083mod gpu;
Chirantan Ekbote44292f52021-06-25 18:31:41 +090084#[cfg(feature = "gpu")]
Dmitry Torokhove464a7a2022-01-26 13:29:36 -080085pub use gpu::GpuRenderServerParameters;
86#[cfg(feature = "gpu")]
Anton Romanov5acc0f52022-01-28 00:18:11 +000087use gpu::*;
Jorge E. Moreirad4562d02021-06-28 16:21:12 -070088
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080089// gpu_device_tube is not used when GPU support is disabled.
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -070090#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -080091fn create_virtio_devices(
92 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -070093 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -070094 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -070095 _exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080096 wayland_device_tube: Tube,
97 gpu_device_tube: Tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +090098 vhost_user_gpu_tubes: Vec<(Tube, Tube)>,
Andrew Walbran3cd93602022-01-25 13:59:23 +000099 balloon_device_tube: Option<Tube>,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800100 balloon_inflate_tube: Option<Tube>,
David Stevens06d157a2022-01-13 23:44:48 +0900101 init_balloon_size: u64,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800102 disk_device_tubes: &mut Vec<Tube>,
103 pmem_device_tubes: &mut Vec<Tube>,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -0800104 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800105 fs_device_tubes: &mut Vec<Tube>,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -0800106 #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>,
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -0800107 vvu_proxy_device_tubes: &mut Vec<Tube>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800108) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -0700109 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -0800110
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900111 #[cfg(feature = "gpu")]
112 for (opt, (host_tube, device_tube)) in cfg.vhost_user_gpu.iter().zip(vhost_user_gpu_tubes) {
113 devs.push(create_vhost_user_gpu_device(
114 cfg,
115 opt,
116 host_tube,
117 device_tube,
118 )?);
119 }
120
Abhishek Bhardwaj103c1b72021-11-01 15:52:23 -0700121 for opt in &cfg.vvu_proxy {
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -0800122 devs.push(create_vvu_proxy_device(
123 cfg,
124 opt,
125 vvu_proxy_device_tubes.remove(0),
126 )?);
Abhishek Bhardwaj103c1b72021-11-01 15:52:23 -0700127 }
128
David Tolnayfa701712019-02-13 16:42:54 -0800129 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800130 let mut resource_bridges = Vec::<Tube>::new();
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900131
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900132 if !cfg.wayland_socket_paths.is_empty() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900133 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800134 let mut wl_resource_bridge = None::<Tube>;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900135
136 #[cfg(feature = "gpu")]
137 {
Jason Macnakcc7070b2019-11-06 14:48:12 -0800138 if cfg.gpu_parameters.is_some() {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700139 let (wl_socket, gpu_socket) = Tube::pair().context("failed to create tube")?;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900140 resource_bridges.push(gpu_socket);
141 wl_resource_bridge = Some(wl_socket);
142 }
143 }
144
145 devs.push(create_wayland_device(
146 cfg,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800147 wayland_device_tube,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900148 wl_resource_bridge,
149 )?);
150 }
David Tolnayfa701712019-02-13 16:42:54 -0800151
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900152 #[cfg(feature = "video-decoder")]
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900153 let video_dec_cfg = if let Some(backend) = cfg.video_dec {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700154 let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
Daniel Verkampffb59122021-03-18 14:06:15 -0700155 resource_bridges.push(gpu_tube);
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900156 Some((video_tube, backend))
Daniel Verkampffb59122021-03-18 14:06:15 -0700157 } else {
158 None
159 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900160
161 #[cfg(feature = "video-encoder")]
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900162 let video_enc_cfg = if let Some(backend) = cfg.video_enc {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700163 let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
Daniel Verkampffb59122021-03-18 14:06:15 -0700164 resource_bridges.push(gpu_tube);
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900165 Some((video_tube, backend))
Daniel Verkampffb59122021-03-18 14:06:15 -0700166 } else {
167 None
168 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900169
Zach Reizner3a8100a2017-09-13 19:15:43 -0700170 #[cfg(feature = "gpu")]
171 {
Noah Golddc7f52b2020-02-01 13:01:58 -0800172 if let Some(gpu_parameters) = &cfg.gpu_parameters {
Anton Romanov5acc0f52022-01-28 00:18:11 +0000173 let mut gpu_display_w = virtio::DEFAULT_DISPLAY_WIDTH;
174 let mut gpu_display_h = virtio::DEFAULT_DISPLAY_HEIGHT;
Jason Macnakd659a0d2021-03-15 15:33:01 -0700175 if !gpu_parameters.displays.is_empty() {
176 gpu_display_w = gpu_parameters.displays[0].width;
177 gpu_display_h = gpu_parameters.displays[0].height;
178 }
179
Zach Reizner65b98f12019-11-22 17:34:58 -0800180 let mut event_devices = Vec::new();
181 if cfg.display_window_mouse {
182 let (event_device_socket, virtio_dev_socket) =
Daniel Verkamp6b298582021-08-16 15:37:11 -0700183 UnixStream::pair().context("failed to create socket")?;
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000184 let (multi_touch_width, multi_touch_height) = cfg
185 .virtio_multi_touch
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700186 .first()
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800187 .as_ref()
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000188 .map(|multi_touch_spec| multi_touch_spec.get_size())
Jason Macnakd659a0d2021-03-15 15:33:01 -0700189 .unwrap_or((gpu_display_w, gpu_display_h));
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000190 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700191 // u32::MAX is the least likely to collide with the indices generated above for
192 // the multi_touch options, which begin at 0.
193 u32::MAX,
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800194 virtio_dev_socket,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000195 multi_touch_width,
196 multi_touch_height,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700197 virtio::base_features(cfg.protected_vm),
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800198 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700199 .context("failed to set up mouse device")?;
Zach Reizner65b98f12019-11-22 17:34:58 -0800200 devs.push(VirtioDeviceStub {
201 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700202 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -0800203 });
204 event_devices.push(EventDevice::touchscreen(event_device_socket));
205 }
206 if cfg.display_window_keyboard {
207 let (event_device_socket, virtio_dev_socket) =
Daniel Verkamp6b298582021-08-16 15:37:11 -0700208 UnixStream::pair().context("failed to create socket")?;
Noah Goldd4ca29b2020-10-27 12:21:52 -0700209 let dev = virtio::new_keyboard(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700210 // u32::MAX is the least likely to collide with the indices generated above for
211 // the multi_touch options, which begin at 0.
212 u32::MAX,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700213 virtio_dev_socket,
214 virtio::base_features(cfg.protected_vm),
215 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700216 .context("failed to set up keyboard device")?;
Zach Reizner65b98f12019-11-22 17:34:58 -0800217 devs.push(VirtioDeviceStub {
218 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700219 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -0800220 });
221 event_devices.push(EventDevice::keyboard(event_device_socket));
222 }
Chia-I Wu16fb6592021-11-10 11:45:32 -0800223
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700224 devs.push(create_gpu_device(
225 cfg,
226 _exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800227 gpu_device_tube,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700228 resource_bridges,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900229 // Use the unnamed socket for GPU display screens.
230 cfg.wayland_socket_paths.get(""),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700231 cfg.x_display.clone(),
Chia-I Wu16fb6592021-11-10 11:45:32 -0800232 render_server_fd,
Zach Reizner65b98f12019-11-22 17:34:58 -0800233 event_devices,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -0800234 map_request,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700235 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -0700236 }
237 }
238
Richard Fung08289b12022-02-02 20:46:19 +0000239 for (_, param) in cfg
240 .serial_parameters
241 .iter()
242 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
243 {
244 let dev = create_console_device(cfg, param)?;
245 devs.push(dev);
246 }
247
248 for disk in &cfg.disks {
249 let disk_device_tube = disk_device_tubes.remove(0);
250 devs.push(create_block_device(cfg, disk, disk_device_tube)?);
251 }
252
253 for blk in &cfg.vhost_user_blk {
254 devs.push(create_vhost_user_block_device(cfg, blk)?);
255 }
256
257 for console in &cfg.vhost_user_console {
258 devs.push(create_vhost_user_console_device(cfg, console)?);
259 }
260
261 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
262 let pmem_device_tube = pmem_device_tubes.remove(0);
263 devs.push(create_pmem_device(
264 cfg,
265 vm,
266 resources,
267 pmem_disk,
268 index,
269 pmem_device_tube,
270 )?);
271 }
272
Andrew Walbrana24a7522022-02-09 18:23:00 +0000273 if cfg.rng {
274 devs.push(create_rng_device(cfg)?);
275 }
Richard Fung08289b12022-02-02 20:46:19 +0000276
277 #[cfg(feature = "tpm")]
278 {
279 if cfg.software_tpm {
280 devs.push(create_tpm_device(cfg)?);
281 }
282 }
283
284 for (idx, single_touch_spec) in cfg.virtio_single_touch.iter().enumerate() {
285 devs.push(create_single_touch_device(
286 cfg,
287 single_touch_spec,
288 idx as u32,
289 )?);
290 }
291
292 for (idx, multi_touch_spec) in cfg.virtio_multi_touch.iter().enumerate() {
293 devs.push(create_multi_touch_device(
294 cfg,
295 multi_touch_spec,
296 idx as u32,
297 )?);
298 }
299
300 for (idx, trackpad_spec) in cfg.virtio_trackpad.iter().enumerate() {
301 devs.push(create_trackpad_device(cfg, trackpad_spec, idx as u32)?);
302 }
303
304 for (idx, mouse_socket) in cfg.virtio_mice.iter().enumerate() {
305 devs.push(create_mouse_device(cfg, mouse_socket, idx as u32)?);
306 }
307
308 for (idx, keyboard_socket) in cfg.virtio_keyboard.iter().enumerate() {
309 devs.push(create_keyboard_device(cfg, keyboard_socket, idx as u32)?);
310 }
311
312 for (idx, switches_socket) in cfg.virtio_switches.iter().enumerate() {
313 devs.push(create_switches_device(cfg, switches_socket, idx as u32)?);
314 }
315
316 for dev_path in &cfg.virtio_input_evdevs {
317 devs.push(create_vinput_device(cfg, dev_path)?);
318 }
319
320 if let Some(balloon_device_tube) = balloon_device_tube {
321 devs.push(create_balloon_device(
322 cfg,
323 balloon_device_tube,
324 balloon_inflate_tube,
325 init_balloon_size,
326 )?);
327 }
328
329 // We checked above that if the IP is defined, then the netmask is, too.
330 for tap_fd in &cfg.tap_fd {
331 devs.push(create_tap_net_device_from_fd(cfg, *tap_fd)?);
332 }
333
334 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
335 (cfg.host_ip, cfg.netmask, cfg.mac_address)
336 {
337 if !cfg.vhost_user_net.is_empty() {
338 bail!("vhost-user-net cannot be used with any of --host_ip, --netmask or --mac");
339 }
340 devs.push(create_net_device_from_config(
341 cfg,
342 host_ip,
343 netmask,
344 mac_address,
345 )?);
346 }
347
348 for tap_name in &cfg.tap_name {
349 devs.push(create_tap_net_device_from_name(cfg, tap_name.as_bytes())?);
350 }
351
352 for net in &cfg.vhost_user_net {
353 devs.push(create_vhost_user_net_device(cfg, net)?);
354 }
355
356 for vsock in &cfg.vhost_user_vsock {
357 devs.push(create_vhost_user_vsock_device(cfg, vsock)?);
358 }
359
360 for opt in &cfg.vhost_user_wl {
361 devs.push(create_vhost_user_wl_device(cfg, opt)?);
362 }
363
Chih-Yang Hsiae31731c2022-01-05 17:30:28 +0800364 #[cfg(feature = "audio_cras")]
365 {
366 for cras_snd in &cfg.cras_snds {
367 devs.push(create_cras_snd_device(cfg, cras_snd.clone())?);
368 }
369 }
370
Daniel Verkampffb59122021-03-18 14:06:15 -0700371 #[cfg(feature = "video-decoder")]
372 {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900373 if let Some((video_dec_tube, video_dec_backend)) = video_dec_cfg {
Daniel Verkampffb59122021-03-18 14:06:15 -0700374 register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900375 video_dec_backend,
Daniel Verkampffb59122021-03-18 14:06:15 -0700376 &mut devs,
377 video_dec_tube,
378 cfg,
379 devices::virtio::VideoDeviceType::Decoder,
380 )?;
381 }
382 }
383
384 #[cfg(feature = "video-encoder")]
385 {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900386 if let Some((video_enc_tube, video_enc_backend)) = video_enc_cfg {
Daniel Verkampffb59122021-03-18 14:06:15 -0700387 register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900388 video_enc_backend,
Daniel Verkampffb59122021-03-18 14:06:15 -0700389 &mut devs,
390 video_enc_tube,
391 cfg,
392 devices::virtio::VideoDeviceType::Encoder,
393 )?;
394 }
395 }
396
Zach Reizneraa575662018-08-15 10:46:32 -0700397 if let Some(cid) = cfg.cid {
Chirantan Ekbote3e8d52b2021-09-10 18:27:16 +0900398 devs.push(create_vhost_vsock_device(cfg, cid)?);
Zach Reizneraa575662018-08-15 10:46:32 -0700399 }
400
Woody Chow5890b702021-02-12 14:57:02 +0900401 for vhost_user_fs in &cfg.vhost_user_fs {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700402 devs.push(create_vhost_user_fs_device(cfg, vhost_user_fs)?);
Woody Chow5890b702021-02-12 14:57:02 +0900403 }
404
Woody Chow1b16db12021-04-02 16:59:59 +0900405 #[cfg(feature = "audio")]
406 for vhost_user_snd in &cfg.vhost_user_snd {
407 devs.push(create_vhost_user_snd_device(cfg, vhost_user_snd)?);
408 }
409
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900410 for shared_dir in &cfg.shared_dirs {
411 let SharedDir {
412 src,
413 tag,
414 kind,
415 uid_map,
416 gid_map,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +0900417 fs_cfg,
418 p9_cfg,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900419 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -0800420
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900421 let dev = match kind {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +0900422 SharedDirKind::FS => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800423 let device_tube = fs_device_tubes.remove(0);
424 create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone(), device_tube)?
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +0900425 }
Chirantan Ekbote75ba8752020-10-27 18:33:02 +0900426 SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag, p9_cfg.clone())?,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900427 };
428 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -0800429 }
430
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900431 if let Some(vhost_user_mac80211_hwsim) = &cfg.vhost_user_mac80211_hwsim {
432 devs.push(create_vhost_user_mac80211_hwsim_device(
433 cfg,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700434 vhost_user_mac80211_hwsim,
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900435 )?);
436 }
437
Jorge E. Moreirad4562d02021-06-28 16:21:12 -0700438 #[cfg(feature = "audio")]
439 if let Some(path) = &cfg.sound {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700440 devs.push(create_sound_device(path, cfg)?);
Jorge E. Moreirad4562d02021-06-28 16:21:12 -0700441 }
442
David Tolnay2b089fc2019-03-04 15:33:22 -0800443 Ok(devs)
444}
445
446fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -0600447 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -0700448 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -0700449 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -0700450 exit_evt: &Event,
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +0000451 panic_wrtube: Tube,
Haiwei Li09b7b8e2022-02-18 18:16:05 +0800452 iommu_attached_endpoints: &mut BTreeMap<u32, Arc<Mutex<Box<dyn MemoryMapperTrait>>>>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800453 control_tubes: &mut Vec<TaggedControlTube>,
454 wayland_device_tube: Tube,
455 gpu_device_tube: Tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900456 vhost_user_gpu_tubes: Vec<(Tube, Tube)>,
Andrew Walbran3cd93602022-01-25 13:59:23 +0000457 balloon_device_tube: Option<Tube>,
David Stevens06d157a2022-01-13 23:44:48 +0900458 init_balloon_size: u64,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800459 disk_device_tubes: &mut Vec<Tube>,
460 pmem_device_tubes: &mut Vec<Tube>,
461 fs_device_tubes: &mut Vec<Tube>,
Daniel Verkampf1439d42021-05-21 13:55:10 -0700462 #[cfg(feature = "usb")] usb_provider: HostBackendDeviceProvider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -0800463 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -0800464 #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>,
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -0800465 vvu_proxy_device_tubes: &mut Vec<Tube>,
Tomasz Nowickiab86d522021-09-22 05:50:46 +0000466) -> DeviceResult<Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>> {
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800467 let mut devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)> = Vec::new();
468 let mut balloon_inflate_tube: Option<Tube> = None;
Zide Chen5deee482021-04-19 11:06:01 -0700469 if !cfg.vfio.is_empty() {
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800470 let mut coiommu_attached_endpoints = Vec::new();
Zide Chendfc4b882021-03-10 16:35:37 -0800471
Tomasz Nowicki71aca792021-06-09 18:53:49 +0000472 for vfio_dev in cfg
473 .vfio
474 .iter()
475 .filter(|dev| dev.get_type() == VfioType::Pci)
476 {
477 let vfio_path = &vfio_dev.vfio_path;
Zide Chen5deee482021-04-19 11:06:01 -0700478 let (vfio_pci_device, jail) = create_vfio_device(
479 cfg,
480 vm,
481 resources,
482 control_tubes,
483 vfio_path.as_path(),
Xiong Zhangf82f2dc2021-05-21 16:54:12 +0800484 None,
Haiwei Li09b7b8e2022-02-18 18:16:05 +0800485 iommu_attached_endpoints,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800486 Some(&mut coiommu_attached_endpoints),
487 vfio_dev.iommu_dev_type(),
Zide Chen5deee482021-04-19 11:06:01 -0700488 )?;
Zide Chendfc4b882021-03-10 16:35:37 -0800489
Tomasz Nowickiab86d522021-09-22 05:50:46 +0000490 devices.push((vfio_pci_device, jail));
Zide Chen5deee482021-04-19 11:06:01 -0700491 }
Zide Chendfc4b882021-03-10 16:35:37 -0800492
Tomasz Nowicki344eb142021-09-22 05:51:58 +0000493 for vfio_dev in cfg
494 .vfio
495 .iter()
496 .filter(|dev| dev.get_type() == VfioType::Platform)
497 {
498 let vfio_path = &vfio_dev.vfio_path;
499 let (vfio_plat_dev, jail) = create_vfio_platform_device(
500 cfg,
501 vm,
502 resources,
503 control_tubes,
504 vfio_path.as_path(),
Haiwei Li09b7b8e2022-02-18 18:16:05 +0800505 iommu_attached_endpoints,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800506 IommuDevType::NoIommu, // Virtio IOMMU is not supported yet
Tomasz Nowicki344eb142021-09-22 05:51:58 +0000507 )?;
508
509 devices.push((Box::new(vfio_plat_dev), jail));
510 }
511
Chuanxiao Dongcb03ec62022-01-20 08:25:38 +0800512 if !coiommu_attached_endpoints.is_empty() || !iommu_attached_endpoints.is_empty() {
513 let mut buf = mem::MaybeUninit::<libc::rlimit>::zeroed();
514 let res = unsafe { libc::getrlimit(libc::RLIMIT_MEMLOCK, buf.as_mut_ptr()) };
515 if res == 0 {
516 let limit = unsafe { buf.assume_init() };
517 let rlim_new = limit
518 .rlim_cur
519 .saturating_add(vm.get_memory().memory_size() as libc::rlim_t);
520 let rlim_max = max(limit.rlim_max, rlim_new);
521 if limit.rlim_cur < rlim_new {
522 let limit_arg = libc::rlimit {
523 rlim_cur: rlim_new as libc::rlim_t,
524 rlim_max: rlim_max as libc::rlim_t,
525 };
526 let res = unsafe { libc::setrlimit(libc::RLIMIT_MEMLOCK, &limit_arg) };
527 if res != 0 {
528 bail!("Set rlimit failed");
529 }
530 }
531 } else {
532 bail!("Get rlimit failed");
533 }
534 }
535
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800536 if !coiommu_attached_endpoints.is_empty() {
537 let vfio_container =
538 VfioCommonSetup::vfio_get_container(IommuDevType::CoIommu, None as Option<&Path>)
539 .context("failed to get vfio container")?;
540 let (coiommu_host_tube, coiommu_device_tube) =
541 Tube::pair().context("failed to create coiommu tube")?;
542 control_tubes.push(TaggedControlTube::VmMemory(coiommu_host_tube));
543 let vcpu_count = cfg.vcpu_count.unwrap_or(1) as u64;
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800544 let (coiommu_tube, balloon_tube) =
545 Tube::pair().context("failed to create coiommu tube")?;
546 balloon_inflate_tube = Some(balloon_tube);
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800547 let dev = CoIommuDev::new(
548 vm.get_memory().clone(),
549 vfio_container,
550 coiommu_device_tube,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800551 coiommu_tube,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800552 coiommu_attached_endpoints,
553 vcpu_count,
Chuanxiao Dongd4468612022-01-14 14:21:17 +0800554 cfg.coiommu_param.unwrap_or_default(),
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800555 )
556 .context("failed to create coiommu device")?;
557
558 devices.push((Box::new(dev), simple_jail(cfg, "coiommu")?));
559 }
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800560 }
561
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800562 let stubs = create_virtio_devices(
563 cfg,
564 vm,
565 resources,
566 exit_evt,
567 wayland_device_tube,
568 gpu_device_tube,
569 vhost_user_gpu_tubes,
570 balloon_device_tube,
571 balloon_inflate_tube,
David Stevens06d157a2022-01-13 23:44:48 +0900572 init_balloon_size,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800573 disk_device_tubes,
574 pmem_device_tubes,
575 map_request,
576 fs_device_tubes,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -0800577 #[cfg(feature = "gpu")]
578 render_server_fd,
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -0800579 vvu_proxy_device_tubes,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800580 )?;
581
582 for stub in stubs {
583 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
584 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
585 let dev = VirtioPciDevice::new(vm.get_memory().clone(), stub.dev, msi_device_tube)
586 .context("failed to create virtio pci dev")?;
587 let dev = Box::new(dev) as Box<dyn BusDeviceObj>;
588 devices.push((dev, stub.jail));
589 }
590
591 #[cfg(feature = "audio")]
592 for ac97_param in &cfg.ac97_parameters {
593 let dev = Ac97Dev::try_new(vm.get_memory().clone(), ac97_param.clone())
594 .context("failed to create ac97 device")?;
595 let jail = simple_jail(cfg, dev.minijail_policy())?;
596 devices.push((Box::new(dev), jail));
597 }
598
599 #[cfg(feature = "usb")]
Sebastian Ene0440d352022-02-04 12:23:56 +0000600 if cfg.usb {
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800601 // Create xhci controller.
602 let usb_controller = Box::new(XhciController::new(vm.get_memory().clone(), usb_provider));
603 devices.push((usb_controller, simple_jail(cfg, "xhci")?));
604 }
605
Mattias Nisslerde2c6402021-10-21 12:05:29 +0000606 for params in &cfg.stub_pci_devices {
607 // Stub devices don't need jailing since they don't do anything.
608 devices.push((Box::new(StubPciDevice::new(params)), None));
609 }
610
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +0000611 devices.push((Box::new(PvPanicPciDevice::new(panic_wrtube)), None));
Tomasz Nowickiab86d522021-09-22 05:50:46 +0000612 Ok(devices)
David Tolnay2b089fc2019-03-04 15:33:22 -0800613}
614
Mattias Nisslerbbd91d02021-12-07 08:57:45 +0000615fn create_file_backed_mappings(
616 cfg: &Config,
617 vm: &mut impl Vm,
618 resources: &mut SystemAllocator,
619) -> Result<()> {
620 for mapping in &cfg.file_backed_mappings {
621 let file = OpenOptions::new()
622 .read(true)
623 .write(mapping.writable)
624 .custom_flags(if mapping.sync { libc::O_SYNC } else { 0 })
625 .open(&mapping.path)
626 .context("failed to open file for file-backed mapping")?;
627 let prot = if mapping.writable {
628 Protection::read_write()
629 } else {
630 Protection::read()
631 };
632 let size = mapping
633 .size
634 .try_into()
635 .context("Invalid size for file-backed mapping")?;
636 let memory_mapping = MemoryMappingBuilder::new(size)
637 .from_file(&file)
638 .offset(mapping.offset)
639 .protection(prot)
640 .build()
641 .context("failed to map backing file for file-backed mapping")?;
642
643 resources
644 .mmio_allocator_any()
645 .allocate_at(
646 mapping.address,
647 mapping.size,
648 Alloc::FileBacked(mapping.address),
649 "file-backed mapping".to_owned(),
650 )
651 .context("failed to allocate guest address for file-backed mapping")?;
652
653 vm.add_memory_region(
654 GuestAddress(mapping.address),
655 Box::new(memory_mapping),
656 !mapping.writable,
657 /* log_dirty_pages = */ false,
658 )
659 .context("failed to configure file-backed mapping")?;
660 }
661
662 Ok(())
663}
664
Xiong Zhangf7874712021-12-24 10:53:59 +0800665fn create_pcie_root_port(
666 host_pcie_rp: Vec<PathBuf>,
667 sys_allocator: &mut SystemAllocator,
668 control_tubes: &mut Vec<TaggedControlTube>,
669 devices: &mut Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
670 hp_vec: &mut Vec<Arc<Mutex<dyn HotPlugBus>>>,
Haiwei Li09b7b8e2022-02-18 18:16:05 +0800671 hp_endpoints_ranges: &mut Vec<RangeInclusive<u32>>,
Xiong Zhangf7874712021-12-24 10:53:59 +0800672) -> Result<()> {
673 if host_pcie_rp.is_empty() {
674 // user doesn't specify host pcie root port which link to this virtual pcie rp,
675 // find the empty bus and create a total virtual pcie rp
Haiwei Lie35d4652022-02-10 15:39:33 +0800676 let mut hp_sec_bus = 0u8;
677 // Create Pcie Root Port for non-root buses, each non-root bus device will be
678 // connected behind a virtual pcie root port.
679 for i in 1..255 {
680 if sys_allocator.pci_bus_empty(i) {
681 if hp_sec_bus == 0 {
682 hp_sec_bus = i;
683 }
684 continue;
685 }
686 let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new(i, false)));
687 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
688 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
689 let pci_bridge = Box::new(PciBridge::new(pcie_root_port.clone(), msi_device_tube));
690 // no ipc is used if the root port disables hotplug
691 devices.push((pci_bridge, None));
692 }
693
694 // Create Pcie Root Port for hot-plug
695 if hp_sec_bus == 0 {
696 return Err(anyhow!("no more addresses are available"));
697 }
698 let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new(hp_sec_bus, true)));
Xiong Zhangf7874712021-12-24 10:53:59 +0800699 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
700 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
701 let pci_bridge = Box::new(PciBridge::new(pcie_root_port.clone(), msi_device_tube));
702
Haiwei Li09b7b8e2022-02-18 18:16:05 +0800703 hp_endpoints_ranges.push(RangeInclusive::new(
704 PciAddress {
705 bus: pci_bridge.get_secondary_num(),
706 dev: 0,
707 func: 0,
708 }
709 .to_u32(),
710 PciAddress {
711 bus: pci_bridge.get_subordinate_num(),
712 dev: 32,
713 func: 8,
714 }
715 .to_u32(),
716 ));
717
Xiong Zhangf7874712021-12-24 10:53:59 +0800718 devices.push((pci_bridge, None));
719 hp_vec.push(pcie_root_port as Arc<Mutex<dyn HotPlugBus>>);
720 } else {
721 // user specify host pcie root port which link to this virtual pcie rp,
722 // reserve the host pci BDF and create a virtual pcie RP with some attrs same as host
723 for pcie_sysfs in host_pcie_rp.iter() {
Xiong Zhangd6de3192022-02-16 13:24:06 +0800724 let pcie_host = PcieHostRootPort::new(pcie_sysfs.as_path())?;
725 let bus_range = pcie_host.get_bus_range();
726 let mut slot_implemented = true;
727 for i in bus_range.secondary..=bus_range.subordinate {
728 // if this bus is occupied by one vfio-pci device, this vfio-pci device is
729 // connected to a pci bridge on host statically, then it should be connected
730 // to a virtual pci bridge in guest statically, this bridge won't have
731 // hotplug capability and won't use slot.
732 if !sys_allocator.pci_bus_empty(i) {
733 slot_implemented = false;
734 }
735 }
736 let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new_from_host(
737 pcie_host,
738 slot_implemented,
739 )?));
Xiong Zhangf7874712021-12-24 10:53:59 +0800740
741 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
742 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
743 let mut pci_bridge = Box::new(PciBridge::new(pcie_root_port.clone(), msi_device_tube));
744 // early reservation for host pcie root port devices.
745 let rootport_addr = pci_bridge.allocate_address(sys_allocator);
746 if rootport_addr.is_err() {
747 warn!(
748 "address reservation failed for hot pcie root port {}",
749 pci_bridge.debug_label()
750 );
751 }
752
Haiwei Li09b7b8e2022-02-18 18:16:05 +0800753 hp_endpoints_ranges.push(RangeInclusive::new(
754 PciAddress {
755 bus: pci_bridge.get_secondary_num(),
756 dev: 0,
757 func: 0,
758 }
759 .to_u32(),
760 PciAddress {
761 bus: pci_bridge.get_subordinate_num(),
762 dev: 32,
763 func: 8,
764 }
765 .to_u32(),
766 ));
767
Xiong Zhangf7874712021-12-24 10:53:59 +0800768 devices.push((pci_bridge, None));
769 hp_vec.push(pcie_root_port as Arc<Mutex<dyn HotPlugBus>>);
770 }
771 }
772
773 Ok(())
774}
775
Zach Reiznera90649a2021-03-31 12:56:08 -0700776fn setup_vm_components(cfg: &Config) -> Result<VmComponents> {
David Tolnay2b089fc2019-03-04 15:33:22 -0800777 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +0100778 Some(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +0900779 open_file(
780 initrd_path,
781 true, /*read_only*/
782 false, /*O_DIRECT*/
783 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700784 .with_context(|| format!("failed to open initrd {}", initrd_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +0100785 )
Daniel Verkampe403f5c2018-12-11 16:29:26 -0800786 } else {
787 None
788 };
789
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700790 let vm_image = match cfg.executable_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +0100791 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +0900792 open_file(
793 kernel_path,
794 true, /*read_only*/
795 false, /*O_DIRECT*/
796 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700797 .with_context(|| format!("failed to open kernel image {}", kernel_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +0100798 ),
799 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +0900800 open_file(bios_path, true /*read_only*/, false /*O_DIRECT*/)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700801 .with_context(|| format!("failed to open bios {}", bios_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +0100802 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700803 _ => panic!("Did not receive a bios or kernel, should be impossible."),
804 };
805
Will Deaconc48e7832021-07-30 19:03:06 +0100806 let swiotlb = if let Some(size) = cfg.swiotlb {
807 Some(
808 size.checked_mul(1024 * 1024)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700809 .ok_or_else(|| anyhow!("requested swiotlb size too large"))?,
Will Deaconc48e7832021-07-30 19:03:06 +0100810 )
811 } else {
812 match cfg.protected_vm {
Andrew Walbran0bbbb682021-12-13 13:42:07 +0000813 ProtectionType::Protected | ProtectionType::ProtectedWithoutFirmware => {
814 Some(64 * 1024 * 1024)
815 }
Will Deaconc48e7832021-07-30 19:03:06 +0100816 ProtectionType::Unprotected => None,
817 }
818 };
819
Zach Reiznera90649a2021-03-31 12:56:08 -0700820 Ok(VmComponents {
Daniel Verkamp6a847062019-11-26 13:16:35 -0800821 memory_size: cfg
822 .memory
823 .unwrap_or(256)
824 .checked_mul(1024 * 1024)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700825 .ok_or_else(|| anyhow!("requested memory size too large"))?,
Will Deaconc48e7832021-07-30 19:03:06 +0100826 swiotlb,
Dylan Reid059a1882018-07-23 17:58:09 -0700827 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -0700828 vcpu_affinity: cfg.vcpu_affinity.clone(),
Daniel Verkamp8a72afc2021-03-15 17:55:52 -0700829 cpu_clusters: cfg.cpu_clusters.clone(),
830 cpu_capacity: cfg.cpu_capacity.clone(),
Suleiman Souhlal015c3c12020-10-07 14:15:41 +0900831 no_smt: cfg.no_smt,
Sergey Senozhatsky1e369c52021-04-13 20:23:51 +0900832 hugepages: cfg.hugepages,
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700833 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -0800834 android_fstab: cfg
835 .android_fstab
836 .as_ref()
Daniel Verkamp6b298582021-08-16 15:37:11 -0700837 .map(|x| {
838 File::open(x)
839 .with_context(|| format!("failed to open android fstab file {}", x.display()))
840 })
Tristan Muntsinger4133b012018-12-21 16:01:56 -0800841 .map_or(Ok(None), |v| v.map(Some))?,
Kansho Nishida282115b2019-12-18 13:13:14 +0900842 pstore: cfg.pstore.clone(),
Daniel Verkampe403f5c2018-12-11 16:29:26 -0800843 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -0700844 extra_kernel_params: cfg.params.clone(),
Tomasz Jeznach42644642020-05-20 23:27:59 -0700845 acpi_sdts: cfg
846 .acpi_tables
847 .iter()
Daniel Verkamp6b298582021-08-16 15:37:11 -0700848 .map(|path| {
849 SDT::from_file(path)
850 .with_context(|| format!("failed to open ACPI file {}", path.display()))
851 })
Tomasz Jeznach42644642020-05-20 23:27:59 -0700852 .collect::<Result<Vec<SDT>>>()?,
Kansho Nishidaab205af2020-08-13 18:17:50 +0900853 rt_cpus: cfg.rt_cpus.clone(),
Suleiman Souhlal63630e82021-02-18 11:53:11 +0900854 delay_rt: cfg.delay_rt,
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100855 protected_vm: cfg.protected_vm,
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900856 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznera90649a2021-03-31 12:56:08 -0700857 gdb: None,
Tomasz Jeznachccb26942021-03-30 22:44:11 -0700858 dmi_path: cfg.dmi_path.clone(),
Tomasz Jeznachd93c29f2021-04-12 11:00:24 -0700859 no_legacy: cfg.no_legacy,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +0800860 host_cpu_topology: cfg.host_cpu_topology,
Grzegorz Jaszczykd33874e2022-02-11 18:27:29 +0000861 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
862 force_s2idle: cfg.force_s2idle,
Zach Reiznera90649a2021-03-31 12:56:08 -0700863 })
864}
865
Andrew Walbranb28ae8e2022-01-17 14:33:10 +0000866#[derive(Copy, Clone, Debug, Eq, PartialEq)]
Dmitry Torokhovf75699f2021-12-03 11:19:13 -0800867pub enum ExitState {
868 Reset,
869 Stop,
Andrew Walbran1a19c672022-01-24 17:24:10 +0000870 Crash,
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +0000871 GuestPanic,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -0800872}
873
874pub fn run_config(cfg: Config) -> Result<ExitState> {
Zach Reiznerdc748482021-04-14 13:59:30 -0700875 let components = setup_vm_components(&cfg)?;
876
877 let guest_mem_layout =
Daniel Verkamp6b298582021-08-16 15:37:11 -0700878 Arch::guest_memory_layout(&components).context("failed to create guest memory layout")?;
879 let guest_mem = GuestMemory::new(&guest_mem_layout).context("failed to create guest memory")?;
Zach Reiznerdc748482021-04-14 13:59:30 -0700880 let mut mem_policy = MemoryPolicy::empty();
881 if components.hugepages {
882 mem_policy |= MemoryPolicy::USE_HUGEPAGES;
883 }
Quentin Perret26203802021-12-02 09:48:43 +0000884 guest_mem.set_memory_policy(mem_policy);
Daniel Verkamp6b298582021-08-16 15:37:11 -0700885 let kvm = Kvm::new_with_path(&cfg.kvm_device_path).context("failed to create kvm")?;
Andrew Walbran00f1c9f2021-12-10 17:13:08 +0000886 let vm = KvmVm::new(&kvm, guest_mem, components.protected_vm).context("failed to create vm")?;
Andrew Walbrane79aba12022-01-27 14:12:35 +0000887 // Check that the VM was actually created in protected mode as expected.
888 if cfg.protected_vm != ProtectionType::Unprotected && !vm.check_capability(VmCap::Protected) {
889 bail!("Failed to create protected VM");
890 }
Daniel Verkamp6b298582021-08-16 15:37:11 -0700891 let vm_clone = vm.try_clone().context("failed to clone vm")?;
Zach Reiznerdc748482021-04-14 13:59:30 -0700892
893 enum KvmIrqChip {
894 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
895 Split(KvmSplitIrqChip),
896 Kernel(KvmKernelIrqChip),
897 }
898
899 impl KvmIrqChip {
900 fn as_mut(&mut self) -> &mut dyn IrqChipArch {
901 match self {
902 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
903 KvmIrqChip::Split(i) => i,
904 KvmIrqChip::Kernel(i) => i,
905 }
906 }
907 }
908
909 let ioapic_host_tube;
910 let mut irq_chip = if cfg.split_irqchip {
911 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
912 unimplemented!("KVM split irqchip mode only supported on x86 processors");
913 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
914 {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700915 let (host_tube, ioapic_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerdc748482021-04-14 13:59:30 -0700916 ioapic_host_tube = Some(host_tube);
917 KvmIrqChip::Split(
918 KvmSplitIrqChip::new(
919 vm_clone,
920 components.vcpu_count,
921 ioapic_device_tube,
922 Some(120),
923 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700924 .context("failed to create IRQ chip")?,
Zach Reiznerdc748482021-04-14 13:59:30 -0700925 )
926 }
927 } else {
928 ioapic_host_tube = None;
929 KvmIrqChip::Kernel(
Daniel Verkamp6b298582021-08-16 15:37:11 -0700930 KvmKernelIrqChip::new(vm_clone, components.vcpu_count)
931 .context("failed to create IRQ chip")?,
Zach Reiznerdc748482021-04-14 13:59:30 -0700932 )
933 };
934
935 run_vm::<KvmVcpu, KvmVm>(cfg, components, vm, irq_chip.as_mut(), ioapic_host_tube)
936}
937
938fn run_vm<Vcpu, V>(
Zach Reiznera90649a2021-03-31 12:56:08 -0700939 cfg: Config,
940 #[allow(unused_mut)] mut components: VmComponents,
Zach Reiznerdc748482021-04-14 13:59:30 -0700941 mut vm: V,
942 irq_chip: &mut dyn IrqChipArch,
943 ioapic_host_tube: Option<Tube>,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -0800944) -> Result<ExitState>
Zach Reiznera90649a2021-03-31 12:56:08 -0700945where
946 Vcpu: VcpuArch + 'static,
947 V: VmArch + 'static,
Zach Reiznera90649a2021-03-31 12:56:08 -0700948{
949 if cfg.sandbox {
950 // Printing something to the syslog before entering minijail so that libc's syslogger has a
951 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
952 // access to those files will not be possible.
953 info!("crosvm entering multiprocess mode");
954 }
955
Daniel Verkampf1439d42021-05-21 13:55:10 -0700956 #[cfg(feature = "usb")]
Zach Reiznera90649a2021-03-31 12:56:08 -0700957 let (usb_control_tube, usb_provider) =
Daniel Verkamp6b298582021-08-16 15:37:11 -0700958 HostBackendDeviceProvider::new().context("failed to create usb provider")?;
Daniel Verkampf1439d42021-05-21 13:55:10 -0700959
Zach Reiznera90649a2021-03-31 12:56:08 -0700960 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
961 // before any jailed devices have been spawned, so that we can catch any of them that fail very
962 // quickly.
Daniel Verkamp6b298582021-08-16 15:37:11 -0700963 let sigchld_fd = SignalFd::new(libc::SIGCHLD).context("failed to create signalfd")?;
Dylan Reid059a1882018-07-23 17:58:09 -0700964
Zach Reiznera60744b2019-02-13 17:33:32 -0800965 let control_server_socket = match &cfg.socket_path {
966 Some(path) => Some(UnlinkUnixSeqpacketListener(
Daniel Verkamp6b298582021-08-16 15:37:11 -0700967 UnixSeqpacketListener::bind(path).context("failed to create control server")?,
Zach Reiznera60744b2019-02-13 17:33:32 -0800968 )),
969 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -0700970 };
Zach Reiznera60744b2019-02-13 17:33:32 -0800971
Zach Reiznera90649a2021-03-31 12:56:08 -0700972 let mut control_tubes = Vec::new();
973
974 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
975 if let Some(port) = cfg.gdb {
976 // GDB needs a control socket to interrupt vcpus.
Daniel Verkamp6b298582021-08-16 15:37:11 -0700977 let (gdb_host_tube, gdb_control_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznera90649a2021-03-31 12:56:08 -0700978 control_tubes.push(TaggedControlTube::Vm(gdb_host_tube));
979 components.gdb = Some((port, gdb_control_tube));
980 }
981
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900982 for wl_cfg in &cfg.vhost_user_wl {
983 let wayland_host_tube = UnixSeqpacket::connect(&wl_cfg.vm_tube)
984 .map(Tube::new)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700985 .context("failed to connect to wayland tube")?;
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900986 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
987 }
988
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900989 let mut vhost_user_gpu_tubes = Vec::with_capacity(cfg.vhost_user_gpu.len());
990 for _ in 0..cfg.vhost_user_gpu.len() {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700991 let (host_tube, device_tube) = Tube::pair().context("failed to create tube")?;
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900992 vhost_user_gpu_tubes.push((
Daniel Verkamp6b298582021-08-16 15:37:11 -0700993 host_tube.try_clone().context("failed to clone tube")?,
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900994 device_tube,
995 ));
996 control_tubes.push(TaggedControlTube::VmMemory(host_tube));
997 }
998
Daniel Verkamp6b298582021-08-16 15:37:11 -0700999 let (wayland_host_tube, wayland_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001000 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
Andrew Walbran3cd93602022-01-25 13:59:23 +00001001
1002 let (balloon_host_tube, balloon_device_tube) = if cfg.balloon {
David Stevens8be9ef02022-01-13 22:50:24 +09001003 if let Some(ref path) = cfg.balloon_control {
1004 (
1005 None,
1006 Some(Tube::new(
1007 UnixSeqpacket::connect(path).context("failed to create balloon control")?,
1008 )),
1009 )
1010 } else {
1011 // Balloon gets a special socket so balloon requests can be forwarded
1012 // from the main process.
1013 let (host, device) = Tube::pair().context("failed to create tube")?;
1014 // Set recv timeout to avoid deadlock on sending BalloonControlCommand
1015 // before the guest is ready.
1016 host.set_recv_timeout(Some(Duration::from_millis(100)))
1017 .context("failed to set timeout")?;
1018 (Some(host), Some(device))
1019 }
Andrew Walbran3cd93602022-01-25 13:59:23 +00001020 } else {
1021 (None, None)
1022 };
Dylan Reid059a1882018-07-23 17:58:09 -07001023
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001024 // Create one control socket per disk.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001025 let mut disk_device_tubes = Vec::new();
1026 let mut disk_host_tubes = Vec::new();
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001027 let disk_count = cfg.disks.len();
1028 for _ in 0..disk_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001029 let (disk_host_tub, disk_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001030 disk_host_tubes.push(disk_host_tub);
1031 disk_device_tubes.push(disk_device_tube);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001032 }
1033
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001034 let mut pmem_device_tubes = Vec::new();
Daniel Verkampe1980a92020-02-07 11:00:55 -08001035 let pmem_count = cfg.pmem_devices.len();
1036 for _ in 0..pmem_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001037 let (pmem_host_tube, pmem_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001038 pmem_device_tubes.push(pmem_device_tube);
1039 control_tubes.push(TaggedControlTube::VmMsync(pmem_host_tube));
Daniel Verkampe1980a92020-02-07 11:00:55 -08001040 }
1041
Daniel Verkamp6b298582021-08-16 15:37:11 -07001042 let (gpu_host_tube, gpu_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001043 control_tubes.push(TaggedControlTube::VmMemory(gpu_host_tube));
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001044
Zach Reiznerdc748482021-04-14 13:59:30 -07001045 if let Some(ioapic_host_tube) = ioapic_host_tube {
1046 control_tubes.push(TaggedControlTube::VmIrq(ioapic_host_tube));
1047 }
Zhuocheng Dingf2e90bf2019-12-02 15:50:20 +08001048
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08001049 let battery = if cfg.battery_type.is_some() {
Daniel Verkampcfe49462021-08-19 17:11:05 -07001050 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(clippy::manual_map))]
Alex Lauf408c732020-11-10 18:24:04 +09001051 let jail = match simple_jail(&cfg, "battery")? {
Daniel Verkampcfe49462021-08-19 17:11:05 -07001052 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(unused_mut))]
Alex Lauf408c732020-11-10 18:24:04 +09001053 Some(mut jail) => {
1054 // Setup a bind mount to the system D-Bus socket if the powerd monitor is used.
1055 #[cfg(feature = "power-monitor-powerd")]
1056 {
Fergus Dall51200512021-08-19 12:54:26 +10001057 add_current_user_to_jail(&mut jail)?;
Alex Lauf408c732020-11-10 18:24:04 +09001058
1059 // Create a tmpfs in the device's root directory so that we can bind mount files.
1060 jail.mount_with_data(
1061 Path::new("none"),
1062 Path::new("/"),
1063 "tmpfs",
1064 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
1065 "size=67108864",
1066 )?;
1067
1068 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
1069 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
1070 }
1071 Some(jail)
1072 }
1073 None => None,
1074 };
1075 (&cfg.battery_type, jail)
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08001076 } else {
1077 (&cfg.battery_type, None)
1078 };
1079
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001080 let map_request: Arc<Mutex<Option<ExternalMapping>>> = Arc::new(Mutex::new(None));
1081
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001082 let fs_count = cfg
1083 .shared_dirs
1084 .iter()
1085 .filter(|sd| sd.kind == SharedDirKind::FS)
1086 .count();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001087 let mut fs_device_tubes = Vec::with_capacity(fs_count);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001088 for _ in 0..fs_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001089 let (fs_host_tube, fs_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001090 control_tubes.push(TaggedControlTube::Fs(fs_host_tube));
1091 fs_device_tubes.push(fs_device_tube);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001092 }
1093
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -08001094 let mut vvu_proxy_device_tubes = Vec::new();
1095 for _ in 0..cfg.vvu_proxy.len() {
1096 let (vvu_proxy_host_tube, vvu_proxy_device_tube) =
1097 Tube::pair().context("failed to create VVU proxy tube")?;
1098 control_tubes.push(TaggedControlTube::VmMemory(vvu_proxy_host_tube));
1099 vvu_proxy_device_tubes.push(vvu_proxy_device_tube);
1100 }
1101
Daniel Verkamp6b298582021-08-16 15:37:11 -07001102 let exit_evt = Event::new().context("failed to create event")?;
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001103 let reset_evt = Event::new().context("failed to create event")?;
Andrew Walbran1a19c672022-01-24 17:24:10 +00001104 let crash_evt = Event::new().context("failed to create event")?;
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +00001105 let (panic_rdtube, panic_wrtube) = Tube::pair().context("failed to create tube")?;
Daniel Verkamp6f4f8222022-01-05 14:09:09 -08001106 let mut sys_allocator = Arch::create_system_allocator(&vm);
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09001107
1108 // Allocate the ramoops region first. AArch64::build_vm() assumes this.
1109 let ramoops_region = match &components.pstore {
1110 Some(pstore) => Some(
Dennis Kempin65740a62021-10-18 16:46:57 -07001111 arch::pstore::create_memory_region(&mut vm, &mut sys_allocator, pstore)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001112 .context("failed to allocate pstore region")?,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09001113 ),
1114 None => None,
1115 };
1116
Mattias Nisslerbbd91d02021-12-07 08:57:45 +00001117 create_file_backed_mappings(&cfg, &mut vm, &mut sys_allocator)?;
1118
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08001119 #[cfg(feature = "gpu")]
1120 // Hold on to the render server jail so it keeps running until we exit run_vm()
Dmitry Torokhove464a7a2022-01-26 13:29:36 -08001121 let (_render_server_jail, render_server_fd) =
1122 if let Some(parameters) = &cfg.gpu_render_server_parameters {
1123 let (jail, fd) = start_gpu_render_server(&cfg, parameters)?;
1124 (Some(ScopedMinijail(jail)), Some(fd))
1125 } else {
1126 (None, None)
1127 };
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08001128
David Stevens06d157a2022-01-13 23:44:48 +09001129 let init_balloon_size = components
1130 .memory_size
1131 .checked_sub(cfg.init_memory.map_or(components.memory_size, |m| {
1132 m.checked_mul(1024 * 1024).unwrap_or(u64::MAX)
1133 }))
1134 .context("failed to calculate init balloon size")?;
1135
Haiwei Li09b7b8e2022-02-18 18:16:05 +08001136 let mut iommu_attached_endpoints: BTreeMap<u32, Arc<Mutex<Box<dyn MemoryMapperTrait>>>> =
1137 BTreeMap::new();
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001138 let mut devices = create_devices(
Zach Reiznerdc748482021-04-14 13:59:30 -07001139 &cfg,
1140 &mut vm,
1141 &mut sys_allocator,
1142 &exit_evt,
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +00001143 panic_wrtube,
Haiwei Li09b7b8e2022-02-18 18:16:05 +08001144 &mut iommu_attached_endpoints,
Zach Reiznerdc748482021-04-14 13:59:30 -07001145 &mut control_tubes,
1146 wayland_device_tube,
1147 gpu_device_tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001148 vhost_user_gpu_tubes,
Zach Reiznerdc748482021-04-14 13:59:30 -07001149 balloon_device_tube,
David Stevens06d157a2022-01-13 23:44:48 +09001150 init_balloon_size,
Zach Reiznerdc748482021-04-14 13:59:30 -07001151 &mut disk_device_tubes,
1152 &mut pmem_device_tubes,
1153 &mut fs_device_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07001154 #[cfg(feature = "usb")]
Zach Reiznerdc748482021-04-14 13:59:30 -07001155 usb_provider,
1156 Arc::clone(&map_request),
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08001157 #[cfg(feature = "gpu")]
1158 render_server_fd,
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -08001159 &mut vvu_proxy_device_tubes,
Zach Reiznerdc748482021-04-14 13:59:30 -07001160 )?;
1161
Haiwei Li09b7b8e2022-02-18 18:16:05 +08001162 let mut hp_endpoints_ranges: Vec<RangeInclusive<u32>> = Vec::new();
1163
Xiong Zhangf7874712021-12-24 10:53:59 +08001164 let mut hotplug_buses: Vec<Arc<Mutex<dyn HotPlugBus>>> = Vec::new();
1165 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1166 {
1167 #[cfg(feature = "direct")]
1168 let rp_host = cfg.pcie_rp.clone();
1169 #[cfg(not(feature = "direct"))]
1170 let rp_host: Vec<PathBuf> = Vec::new();
1171
1172 // Create Pcie Root Port
1173 create_pcie_root_port(
1174 rp_host,
1175 &mut sys_allocator,
1176 &mut control_tubes,
1177 &mut devices,
1178 &mut hotplug_buses,
Haiwei Li09b7b8e2022-02-18 18:16:05 +08001179 &mut hp_endpoints_ranges,
Xiong Zhangf7874712021-12-24 10:53:59 +08001180 )?;
1181 }
1182
Haiwei Li09b7b8e2022-02-18 18:16:05 +08001183 let (translate_response_senders, request_rx) = setup_virtio_access_platform(
1184 &mut sys_allocator,
1185 &mut iommu_attached_endpoints,
1186 &mut devices,
1187 )?;
1188
Haiwei Li87bc2fc2022-02-18 14:37:40 +08001189 let iommu_host_tube = if !iommu_attached_endpoints.is_empty() || cfg.virtio_iommu {
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001190 let (iommu_host_tube, iommu_device_tube) = Tube::pair().context("failed to create tube")?;
Haiwei Li09b7b8e2022-02-18 18:16:05 +08001191 let iommu_dev = create_iommu_device(
1192 &cfg,
1193 (1u64 << vm.get_guest_phys_addr_bits()) - 1,
1194 iommu_attached_endpoints,
1195 hp_endpoints_ranges,
1196 translate_response_senders,
1197 request_rx,
1198 iommu_device_tube,
1199 )?;
1200
1201 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
1202 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
1203 let mut dev = VirtioPciDevice::new(vm.get_memory().clone(), iommu_dev.dev, msi_device_tube)
1204 .context("failed to create virtio pci dev")?;
1205 // early reservation for viommu.
1206 dev.allocate_address(&mut sys_allocator)
1207 .context("failed to allocate resources early for virtio pci dev")?;
1208 let dev = Box::new(dev);
1209 devices.push((dev, iommu_dev.jail));
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001210 Some(iommu_host_tube)
1211 } else {
1212 None
1213 };
Haiwei Li09b7b8e2022-02-18 18:16:05 +08001214
Peter Fangc2bba082021-04-19 18:40:24 -07001215 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001216 for device in devices
1217 .iter_mut()
1218 .filter_map(|(dev, _)| dev.as_pci_device_mut())
1219 {
Peter Fangc2bba082021-04-19 18:40:24 -07001220 let sdts = device
1221 .generate_acpi(components.acpi_sdts)
1222 .or_else(|| {
1223 error!("ACPI table generation error");
1224 None
1225 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07001226 .ok_or_else(|| anyhow!("failed to generate ACPI table"))?;
Peter Fangc2bba082021-04-19 18:40:24 -07001227 components.acpi_sdts = sdts;
1228 }
1229
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001230 // KVM_CREATE_VCPU uses apic id for x86 and uses cpu id for others.
1231 let mut kvm_vcpu_ids = Vec::new();
1232
Kuo-Hsin Yang6139da62021-04-14 16:55:24 +08001233 #[cfg_attr(not(feature = "direct"), allow(unused_mut))]
Zach Reiznerdc748482021-04-14 13:59:30 -07001234 let mut linux = Arch::build_vm::<V, Vcpu>(
Trent Begin17ccaad2019-04-17 13:51:25 -06001235 components,
Zach Reiznerdc748482021-04-14 13:59:30 -07001236 &exit_evt,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001237 &reset_evt,
Zach Reiznerdc748482021-04-14 13:59:30 -07001238 &mut sys_allocator,
Trent Begin17ccaad2019-04-17 13:51:25 -06001239 &cfg.serial_parameters,
Matt Delco45caf912019-11-13 08:11:09 -08001240 simple_jail(&cfg, "serial")?,
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08001241 battery,
Zach Reiznera90649a2021-03-31 12:56:08 -07001242 vm,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09001243 ramoops_region,
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001244 devices,
Zach Reiznerdc748482021-04-14 13:59:30 -07001245 irq_chip,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001246 &mut kvm_vcpu_ids,
Trent Begin17ccaad2019-04-17 13:51:25 -06001247 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001248 .context("the architecture failed to build the vm")?;
Lepton Wu60893882018-11-21 11:06:18 -08001249
Daniel Verkamp1286b482021-11-30 15:14:16 -08001250 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1251 {
Xiong Zhangf7874712021-12-24 10:53:59 +08001252 for hotplug_bus in hotplug_buses.iter() {
1253 linux.hotplug_bus.push(hotplug_bus.clone());
1254 }
Daniel Verkamp1286b482021-11-30 15:14:16 -08001255 }
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001256
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08001257 #[cfg(feature = "direct")]
1258 if let Some(pmio) = &cfg.direct_pmio {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001259 let direct_io = Arc::new(
1260 devices::DirectIo::new(&pmio.path, false).context("failed to open direct io device")?,
1261 );
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08001262 for range in pmio.ranges.iter() {
1263 linux
1264 .io_bus
Junichi Uekawab180f9c2021-12-07 09:21:36 +09001265 .insert_sync(direct_io.clone(), range.base, range.len)
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08001266 .unwrap();
1267 }
1268 };
1269
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001270 #[cfg(feature = "direct")]
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07001271 if let Some(mmio) = &cfg.direct_mmio {
Xiong Zhang46471a02021-11-12 00:34:42 +08001272 let direct_mmio = Arc::new(
Junichi Uekawab180f9c2021-12-07 09:21:36 +09001273 devices::DirectMmio::new(&mmio.path, false, &mmio.ranges)
Xiong Zhang46471a02021-11-12 00:34:42 +08001274 .context("failed to open direct mmio device")?,
Daniel Verkamp6b298582021-08-16 15:37:11 -07001275 );
Xiong Zhang46471a02021-11-12 00:34:42 +08001276
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07001277 for range in mmio.ranges.iter() {
1278 linux
1279 .mmio_bus
Junichi Uekawab180f9c2021-12-07 09:21:36 +09001280 .insert_sync(direct_mmio.clone(), range.base, range.len)
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07001281 .unwrap();
1282 }
1283 };
1284
1285 #[cfg(feature = "direct")]
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001286 let mut irqs = Vec::new();
1287
1288 #[cfg(feature = "direct")]
1289 for irq in &cfg.direct_level_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07001290 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001291 warn!("irq {} already reserved.", irq);
1292 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07001293 let trigger = Event::new().context("failed to create event")?;
1294 let resample = Event::new().context("failed to create event")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001295 linux
1296 .irq_chip
1297 .register_irq_event(*irq, &trigger, Some(&resample))
1298 .unwrap();
Daniel Verkamp6b298582021-08-16 15:37:11 -07001299 let direct_irq = devices::DirectIrq::new(trigger, Some(resample))
1300 .context("failed to enable interrupt forwarding")?;
1301 direct_irq
1302 .irq_enable(*irq)
1303 .context("failed to enable interrupt forwarding")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001304 irqs.push(direct_irq);
1305 }
1306
1307 #[cfg(feature = "direct")]
1308 for irq in &cfg.direct_edge_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07001309 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001310 warn!("irq {} already reserved.", irq);
1311 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07001312 let trigger = Event::new().context("failed to create event")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001313 linux
1314 .irq_chip
1315 .register_irq_event(*irq, &trigger, None)
1316 .unwrap();
Daniel Verkamp6b298582021-08-16 15:37:11 -07001317 let direct_irq = devices::DirectIrq::new(trigger, None)
1318 .context("failed to enable interrupt forwarding")?;
1319 direct_irq
1320 .irq_enable(*irq)
1321 .context("failed to enable interrupt forwarding")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001322 irqs.push(direct_irq);
1323 }
1324
Daniel Verkamp6b298582021-08-16 15:37:11 -07001325 let gralloc = RutabagaGralloc::new().context("failed to create gralloc")?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001326 run_control(
1327 linux,
Zach Reiznerdc748482021-04-14 13:59:30 -07001328 sys_allocator,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001329 cfg,
Zach Reiznera60744b2019-02-13 17:33:32 -08001330 control_server_socket,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001331 control_tubes,
1332 balloon_host_tube,
1333 &disk_host_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07001334 #[cfg(feature = "usb")]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001335 usb_control_tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07001336 exit_evt,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001337 reset_evt,
Andrew Walbran1a19c672022-01-24 17:24:10 +00001338 crash_evt,
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +00001339 panic_rdtube,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001340 sigchld_fd,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001341 Arc::clone(&map_request),
Gurchetan Singh293913c2020-12-09 10:44:13 -08001342 gralloc,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001343 kvm_vcpu_ids,
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001344 iommu_host_tube,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001345 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07001346}
1347
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001348fn get_hp_bus<V: VmArch, Vcpu: VcpuArch>(
1349 linux: &RunnableLinuxVm<V, Vcpu>,
1350 host_addr: PciAddress,
1351) -> Result<(Arc<Mutex<dyn HotPlugBus>>, u8)> {
1352 for hp_bus in linux.hotplug_bus.iter() {
1353 if let Some(number) = hp_bus.lock().is_match(host_addr) {
1354 return Ok((hp_bus.clone(), number));
1355 }
1356 }
1357 Err(anyhow!("Failed to find a suitable hotplug bus"))
1358}
1359
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001360fn add_vfio_device<V: VmArch, Vcpu: VcpuArch>(
1361 linux: &mut RunnableLinuxVm<V, Vcpu>,
1362 sys_allocator: &mut SystemAllocator,
1363 cfg: &Config,
1364 control_tubes: &mut Vec<TaggedControlTube>,
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001365 iommu_host_tube: &Option<Tube>,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001366 vfio_path: &Path,
1367) -> Result<()> {
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001368 let host_os_str = vfio_path
1369 .file_name()
1370 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
1371 let host_str = host_os_str
1372 .to_str()
1373 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Daniel Verkamp906a38f2022-02-22 13:58:53 -08001374 let host_addr =
1375 PciAddress::from_string(host_str).context("failed to parse vfio pci address")?;
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001376
1377 let (hp_bus, bus_num) = get_hp_bus(linux, host_addr)?;
1378
Woody Chow055b81b2022-01-25 18:34:29 +09001379 let mut endpoints: BTreeMap<u32, Arc<Mutex<Box<dyn MemoryMapperTrait>>>> = BTreeMap::new();
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001380 let (vfio_pci_device, jail) = create_vfio_device(
1381 cfg,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001382 &linux.vm,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001383 sys_allocator,
1384 control_tubes,
1385 vfio_path,
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001386 Some(bus_num),
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001387 &mut endpoints,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001388 None,
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001389 if iommu_host_tube.is_some() {
1390 IommuDevType::VirtioIommu
1391 } else {
1392 IommuDevType::NoIommu
1393 },
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001394 )?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001395
1396 let pci_address = Arch::register_pci_device(linux, vfio_pci_device, jail, sys_allocator)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001397 .context("Failed to configure pci hotplug device")?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001398
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001399 if let Some(iommu_host_tube) = iommu_host_tube {
1400 let &endpoint_addr = endpoints.iter().next().unwrap().0;
1401 let mapper = endpoints.remove(&endpoint_addr).unwrap();
1402 if let Some(vfio_wrapper) = mapper.lock().as_vfio_wrapper() {
1403 let vfio_container = vfio_wrapper.as_vfio_container();
1404 let descriptor = vfio_container.lock().into_raw_descriptor()?;
1405 let request = VirtioIOMMURequest::VfioCommand(VirtioIOMMUVfioCommand::VfioDeviceAdd {
1406 endpoint_addr,
1407 container: {
1408 // Safe because the descriptor is uniquely owned by `descriptor`.
1409 unsafe { File::from_raw_descriptor(descriptor) }
1410 },
1411 });
1412
1413 match virtio_iommu_request(iommu_host_tube, &request)
1414 .map_err(|_| VirtioIOMMUVfioError::SocketFailed)?
1415 {
1416 VirtioIOMMUResponse::VfioResponse(VirtioIOMMUVfioResult::Ok) => (),
1417 resp => bail!("Unexpected message response: {:?}", resp),
1418 }
1419 };
1420 }
1421
Daniel Verkamp6b298582021-08-16 15:37:11 -07001422 let host_os_str = vfio_path
1423 .file_name()
1424 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
1425 let host_str = host_os_str
1426 .to_str()
1427 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Daniel Verkamp906a38f2022-02-22 13:58:53 -08001428 let host_addr =
1429 PciAddress::from_string(host_str).context("failed to parse vfio pci address")?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001430 let host_key = HostHotPlugKey::Vfio { host_addr };
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001431 let mut hp_bus = hp_bus.lock();
1432 hp_bus.add_hotplug_device(host_key, pci_address);
1433 hp_bus.hot_plug(pci_address);
1434 Ok(())
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001435}
1436
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001437fn remove_vfio_device<V: VmArch, Vcpu: VcpuArch>(
1438 linux: &RunnableLinuxVm<V, Vcpu>,
Xiong Zhang2d45b912021-05-13 16:22:25 +08001439 sys_allocator: &mut SystemAllocator,
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001440 iommu_host_tube: &Option<Tube>,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001441 vfio_path: &Path,
1442) -> Result<()> {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001443 let host_os_str = vfio_path
1444 .file_name()
1445 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
1446 let host_str = host_os_str
1447 .to_str()
1448 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Daniel Verkamp906a38f2022-02-22 13:58:53 -08001449 let host_addr =
1450 PciAddress::from_string(host_str).context("failed to parse vfio pci address")?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001451 let host_key = HostHotPlugKey::Vfio { host_addr };
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001452 for hp_bus in linux.hotplug_bus.iter() {
1453 let mut hp_bus_lock = hp_bus.lock();
1454 if let Some(pci_addr) = hp_bus_lock.get_hotplug_device(host_key) {
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001455 if let Some(iommu_host_tube) = iommu_host_tube {
1456 let request =
1457 VirtioIOMMURequest::VfioCommand(VirtioIOMMUVfioCommand::VfioDeviceDel {
1458 endpoint_addr: pci_addr.to_u32(),
1459 });
1460 match virtio_iommu_request(iommu_host_tube, &request)
1461 .map_err(|_| VirtioIOMMUVfioError::SocketFailed)?
1462 {
1463 VirtioIOMMUResponse::VfioResponse(VirtioIOMMUVfioResult::Ok) => (),
1464 resp => bail!("Unexpected message response: {:?}", resp),
1465 }
1466 }
1467
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001468 hp_bus_lock.hot_unplug(pci_addr);
Xiong Zhang2d45b912021-05-13 16:22:25 +08001469 sys_allocator.release_pci(pci_addr.bus, pci_addr.dev, pci_addr.func);
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001470 return Ok(());
1471 }
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001472 }
1473
Daniel Verkamp6b298582021-08-16 15:37:11 -07001474 Err(anyhow!("HotPlugBus hasn't been implemented"))
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001475}
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001476
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001477fn handle_vfio_command<V: VmArch, Vcpu: VcpuArch>(
1478 linux: &mut RunnableLinuxVm<V, Vcpu>,
1479 sys_allocator: &mut SystemAllocator,
1480 cfg: &Config,
1481 add_tubes: &mut Vec<TaggedControlTube>,
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001482 iommu_host_tube: &Option<Tube>,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001483 vfio_path: &Path,
1484 add: bool,
1485) -> VmResponse {
1486 let ret = if add {
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001487 add_vfio_device(
1488 linux,
1489 sys_allocator,
1490 cfg,
1491 add_tubes,
1492 iommu_host_tube,
1493 vfio_path,
1494 )
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001495 } else {
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001496 remove_vfio_device(linux, sys_allocator, iommu_host_tube, vfio_path)
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001497 };
1498
1499 match ret {
1500 Ok(()) => VmResponse::Ok,
1501 Err(e) => {
1502 error!("hanlde_vfio_command failure: {}", e);
1503 add_tubes.clear();
1504 VmResponse::Err(base::Error::new(libc::EINVAL))
1505 }
1506 }
1507}
1508
Zach Reiznerdc748482021-04-14 13:59:30 -07001509fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
1510 mut linux: RunnableLinuxVm<V, Vcpu>,
1511 mut sys_allocator: SystemAllocator,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001512 cfg: Config,
Zach Reiznera60744b2019-02-13 17:33:32 -08001513 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001514 mut control_tubes: Vec<TaggedControlTube>,
Andrew Walbran3cd93602022-01-25 13:59:23 +00001515 balloon_host_tube: Option<Tube>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001516 disk_host_tubes: &[Tube],
Daniel Verkampf1439d42021-05-21 13:55:10 -07001517 #[cfg(feature = "usb")] usb_control_tube: Tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07001518 exit_evt: Event,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001519 reset_evt: Event,
Andrew Walbran1a19c672022-01-24 17:24:10 +00001520 crash_evt: Event,
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +00001521 panic_rdtube: Tube,
Zach Reizner55a9e502018-10-03 10:22:32 -07001522 sigchld_fd: SignalFd,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001523 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Gurchetan Singh293913c2020-12-09 10:44:13 -08001524 mut gralloc: RutabagaGralloc,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001525 kvm_vcpu_ids: Vec<usize>,
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001526 iommu_host_tube: Option<Tube>,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001527) -> Result<ExitState> {
Zach Reizner5bed0d22018-03-28 02:31:11 -07001528 #[derive(PollToken)]
1529 enum Token {
1530 Exit,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001531 Reset,
Andrew Walbran1a19c672022-01-24 17:24:10 +00001532 Crash,
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +00001533 Panic,
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001534 Suspend,
Zach Reizner5bed0d22018-03-28 02:31:11 -07001535 ChildSignal,
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001536 IrqFd { index: IrqEventIndex },
Zach Reiznera60744b2019-02-13 17:33:32 -08001537 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07001538 VmControl { index: usize },
1539 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001540
Zach Reizner19ad1f32019-12-12 18:58:50 -08001541 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08001542 .set_raw_mode()
1543 .expect("failed to set terminal raw mode");
1544
Michael Hoylee392c462020-10-07 03:29:24 -07001545 let wait_ctx = WaitContext::build_with(&[
Zach Reiznerdc748482021-04-14 13:59:30 -07001546 (&exit_evt, Token::Exit),
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001547 (&reset_evt, Token::Reset),
Andrew Walbran1a19c672022-01-24 17:24:10 +00001548 (&crash_evt, Token::Crash),
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +00001549 (&panic_rdtube, Token::Panic),
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001550 (&linux.suspend_evt, Token::Suspend),
Zach Reiznerb2110be2019-07-23 15:55:03 -07001551 (&sigchld_fd, Token::ChildSignal),
1552 ])
Daniel Verkamp6b298582021-08-16 15:37:11 -07001553 .context("failed to add descriptor to wait context")?;
Zach Reiznerb2110be2019-07-23 15:55:03 -07001554
Zach Reiznera60744b2019-02-13 17:33:32 -08001555 if let Some(socket_server) = &control_server_socket {
Michael Hoylee392c462020-10-07 03:29:24 -07001556 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08001557 .add(socket_server, Token::VmControlServer)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001558 .context("failed to add descriptor to wait context")?;
Zach Reiznera60744b2019-02-13 17:33:32 -08001559 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001560 for (index, socket) in control_tubes.iter().enumerate() {
Michael Hoylee392c462020-10-07 03:29:24 -07001561 wait_ctx
Zach Reizner55a9e502018-10-03 10:22:32 -07001562 .add(socket.as_ref(), Token::VmControl { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07001563 .context("failed to add descriptor to wait context")?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001564 }
1565
Steven Richmanf32d0b42020-06-20 21:45:32 -07001566 let events = linux
1567 .irq_chip
1568 .irq_event_tokens()
Daniel Verkamp6b298582021-08-16 15:37:11 -07001569 .context("failed to add descriptor to wait context")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07001570
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001571 for (index, _gsi, evt) in events {
Michael Hoylee392c462020-10-07 03:29:24 -07001572 wait_ctx
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001573 .add(&evt, Token::IrqFd { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07001574 .context("failed to add descriptor to wait context")?;
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08001575 }
1576
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001577 if cfg.sandbox {
Lepton Wu20333e42019-03-14 10:48:03 -07001578 // Before starting VCPUs, in case we started with some capabilities, drop them all.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001579 drop_capabilities().context("failed to drop process capabilities")?;
Lepton Wu20333e42019-03-14 10:48:03 -07001580 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08001581
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001582 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1583 // Create a channel for GDB thread.
1584 let (to_gdb_channel, from_vcpu_channel) = if linux.gdb.is_some() {
1585 let (s, r) = mpsc::channel();
1586 (Some(s), Some(r))
1587 } else {
1588 (None, None)
1589 };
1590
Steven Richmanf32d0b42020-06-20 21:45:32 -07001591 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
1592 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
Steven Richmanf32d0b42020-06-20 21:45:32 -07001593 let use_hypervisor_signals = !linux
1594 .vm
1595 .get_hypervisor()
Andrew Walbran985491a2022-01-27 13:47:40 +00001596 .check_capability(HypervisorCap::ImmediateExit);
Anton Romanov5acc0f52022-01-28 00:18:11 +00001597 vcpu::setup_vcpu_signal_handler::<Vcpu>(use_hypervisor_signals)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07001598
Zach Reizner304e7312020-09-29 16:00:24 -07001599 let vcpus: Vec<Option<_>> = match linux.vcpus.take() {
Andrew Walbran9cfdbd92021-01-11 17:40:34 +00001600 Some(vec) => vec.into_iter().map(Some).collect(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07001601 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
1602 };
Yusuke Sato31e136a2021-08-18 11:51:38 -07001603 // Enable core scheduling before creating vCPUs so that the cookie will be
1604 // shared by all vCPU threads.
1605 // TODO(b/199312402): Avoid enabling core scheduling for the crosvm process
1606 // itself for even better performance. Only vCPUs need the feature.
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001607 if cfg.per_vm_core_scheduling {
Yusuke Sato31e136a2021-08-18 11:51:38 -07001608 if let Err(e) = enable_core_scheduling() {
1609 error!("Failed to enable core scheduling: {}", e);
1610 }
1611 }
Vineeth Pillai2b6855e2022-01-12 16:57:22 +00001612 let vcpu_cgroup_tasks_file = match &cfg.vcpu_cgroup_path {
1613 None => None,
1614 Some(cgroup_path) => {
1615 // Move main process to cgroup_path
1616 let mut f = File::create(&cgroup_path.join("tasks"))?;
1617 f.write_all(process::id().to_string().as_bytes())?;
1618 Some(f)
1619 }
1620 };
Daniel Verkamp94c35272019-09-12 13:31:30 -07001621 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Dylan Reidb0492662019-05-17 14:50:13 -07001622 let (to_vcpu_channel, from_main_channel) = mpsc::channel();
Daniel Verkampc677fb42020-09-08 13:47:49 -07001623 let vcpu_affinity = match linux.vcpu_affinity.clone() {
1624 Some(VcpuAffinity::Global(v)) => v,
1625 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(),
1626 None => Default::default(),
1627 };
Anton Romanov5acc0f52022-01-28 00:18:11 +00001628 let handle = vcpu::run_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001629 cpu_id,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001630 kvm_vcpu_ids[cpu_id],
Zach Reizner55a9e502018-10-03 10:22:32 -07001631 vcpu,
Daniel Verkamp6b298582021-08-16 15:37:11 -07001632 linux.vm.try_clone().context("failed to clone vm")?,
1633 linux
1634 .irq_chip
1635 .try_box_clone()
1636 .context("failed to clone irqchip")?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001637 linux.vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001638 linux.rt_cpus.contains(&cpu_id),
Daniel Verkampc677fb42020-09-08 13:47:49 -07001639 vcpu_affinity,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09001640 linux.delay_rt,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001641 linux.no_smt,
Zach Reizner55a9e502018-10-03 10:22:32 -07001642 vcpu_thread_barrier.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07001643 linux.has_bios,
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07001644 (*linux.io_bus).clone(),
1645 (*linux.mmio_bus).clone(),
Daniel Verkamp6b298582021-08-16 15:37:11 -07001646 exit_evt.try_clone().context("failed to clone event")?,
Andrew Walbranb28ae8e2022-01-17 14:33:10 +00001647 reset_evt.try_clone().context("failed to clone event")?,
Andrew Walbran1a19c672022-01-24 17:24:10 +00001648 crash_evt.try_clone().context("failed to clone event")?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001649 linux.vm.check_capability(VmCap::PvClockSuspend),
Dylan Reidb0492662019-05-17 14:50:13 -07001650 from_main_channel,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001651 use_hypervisor_signals,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001652 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1653 to_gdb_channel.clone(),
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001654 cfg.per_vm_core_scheduling,
1655 cfg.host_cpu_topology,
Zide Chen344e2432022-01-28 14:58:53 -08001656 cfg.privileged_vm,
Vineeth Pillai2b6855e2022-01-12 16:57:22 +00001657 match vcpu_cgroup_tasks_file {
1658 None => None,
1659 Some(ref f) => Some(
1660 f.try_clone()
1661 .context("failed to clone vcpu cgroup tasks file")?,
1662 ),
1663 },
Zach Reizner55a9e502018-10-03 10:22:32 -07001664 )?;
Dylan Reidb0492662019-05-17 14:50:13 -07001665 vcpu_handles.push((handle, to_vcpu_channel));
Dylan Reid059a1882018-07-23 17:58:09 -07001666 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001667
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001668 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1669 // Spawn GDB thread.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001670 if let Some((gdb_port_num, gdb_control_tube)) = linux.gdb.take() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001671 let to_vcpu_channels = vcpu_handles
1672 .iter()
1673 .map(|(_handle, channel)| channel.clone())
1674 .collect();
1675 let target = GdbStub::new(
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001676 gdb_control_tube,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001677 to_vcpu_channels,
1678 from_vcpu_channel.unwrap(), // Must succeed to unwrap()
1679 );
1680 thread::Builder::new()
1681 .name("gdb".to_owned())
1682 .spawn(move || gdb_thread(target, gdb_port_num))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001683 .context("failed to spawn GDB thread")?;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001684 };
1685
Dylan Reid059a1882018-07-23 17:58:09 -07001686 vcpu_thread_barrier.wait();
1687
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001688 let mut exit_state = ExitState::Stop;
Charles William Dick54045012021-07-27 19:11:53 +09001689 let mut balloon_stats_id: u64 = 0;
1690
Michael Hoylee392c462020-10-07 03:29:24 -07001691 'wait: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07001692 let events = {
Michael Hoylee392c462020-10-07 03:29:24 -07001693 match wait_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001694 Ok(v) => v,
1695 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001696 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001697 break;
1698 }
1699 }
1700 };
Zach Reiznera60744b2019-02-13 17:33:32 -08001701
Steven Richmanf32d0b42020-06-20 21:45:32 -07001702 if let Err(e) = linux.irq_chip.process_delayed_irq_events() {
1703 warn!("can't deliver delayed irqs: {}", e);
1704 }
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08001705
Zach Reiznera60744b2019-02-13 17:33:32 -08001706 let mut vm_control_indices_to_remove = Vec::new();
Michael Hoylee392c462020-10-07 03:29:24 -07001707 for event in events.iter().filter(|e| e.is_readable) {
1708 match event.token {
Zach Reizner5bed0d22018-03-28 02:31:11 -07001709 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001710 info!("vcpu requested shutdown");
Michael Hoylee392c462020-10-07 03:29:24 -07001711 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001712 }
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001713 Token::Reset => {
1714 info!("vcpu requested reset");
1715 exit_state = ExitState::Reset;
1716 break 'wait;
1717 }
Andrew Walbran1a19c672022-01-24 17:24:10 +00001718 Token::Crash => {
1719 info!("vcpu crashed");
1720 exit_state = ExitState::Crash;
1721 break 'wait;
1722 }
Vineeth Pillai9a3d2dc2022-02-18 14:10:16 +00001723 Token::Panic => {
1724 let mut break_to_wait: bool = true;
1725 match panic_rdtube.recv::<u8>() {
1726 Ok(panic_code) => {
1727 let panic_code = PvPanicCode::from_u8(panic_code);
1728 info!("Guest reported panic [Code: {}]", panic_code);
1729 if panic_code == PvPanicCode::CrashLoaded {
1730 // VM is booting to crash kernel.
1731 break_to_wait = false;
1732 }
1733 }
1734 Err(e) => {
1735 warn!("failed to recv panic event: {} ", e);
1736 }
1737 }
1738 if break_to_wait {
1739 exit_state = ExitState::GuestPanic;
1740 break 'wait;
1741 }
1742 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001743 Token::Suspend => {
1744 info!("VM requested suspend");
1745 linux.suspend_evt.read().unwrap();
Anton Romanov5acc0f52022-01-28 00:18:11 +00001746 vcpu::kick_all_vcpus(
Zach Reiznerdc748482021-04-14 13:59:30 -07001747 &vcpu_handles,
1748 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08001749 VcpuControl::RunState(VmRunMode::Suspending),
Zach Reiznerdc748482021-04-14 13:59:30 -07001750 );
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001751 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001752 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001753 // Print all available siginfo structs, then exit the loop.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001754 while let Some(siginfo) =
1755 sigchld_fd.read().context("failed to create signalfd")?
1756 {
Zach Reizner3ba00982019-01-23 19:04:43 -08001757 let pid = siginfo.ssi_pid;
1758 let pid_label = match linux.pid_debug_label_map.get(&pid) {
1759 Some(label) => format!("{} (pid {})", label, pid),
1760 None => format!("pid {}", pid),
1761 };
David Tolnayf5032762018-12-03 10:46:45 -08001762 error!(
1763 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08001764 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08001765 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08001766 }
Michael Hoylee392c462020-10-07 03:29:24 -07001767 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001768 }
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001769 Token::IrqFd { index } => {
1770 if let Err(e) = linux.irq_chip.service_irq_event(index) {
1771 error!("failed to signal irq {}: {}", index, e);
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08001772 }
1773 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001774 Token::VmControlServer => {
1775 if let Some(socket_server) = &control_server_socket {
1776 match socket_server.accept() {
1777 Ok(socket) => {
Michael Hoylee392c462020-10-07 03:29:24 -07001778 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08001779 .add(
1780 &socket,
1781 Token::VmControl {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001782 index: control_tubes.len(),
Zach Reiznera60744b2019-02-13 17:33:32 -08001783 },
1784 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001785 .context("failed to add descriptor to wait context")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001786 control_tubes.push(TaggedControlTube::Vm(Tube::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08001787 }
1788 Err(e) => error!("failed to accept socket: {}", e),
1789 }
1790 }
1791 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001792 Token::VmControl { index } => {
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001793 let mut add_tubes = Vec::new();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001794 if let Some(socket) = control_tubes.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001795 match socket {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001796 TaggedControlTube::Vm(tube) => match tube.recv::<VmRequest>() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001797 Ok(request) => {
1798 let mut run_mode_opt = None;
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001799 let response = match request {
1800 VmRequest::VfioCommand { vfio_path, add } => {
1801 handle_vfio_command(
1802 &mut linux,
1803 &mut sys_allocator,
1804 &cfg,
1805 &mut add_tubes,
Haiwei Lie2dffbf2022-02-18 14:30:56 +08001806 &iommu_host_tube,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001807 &vfio_path,
1808 add,
1809 )
1810 }
1811 _ => request.execute(
1812 &mut run_mode_opt,
Andrew Walbran3cd93602022-01-25 13:59:23 +00001813 balloon_host_tube.as_ref(),
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001814 &mut balloon_stats_id,
1815 disk_host_tubes,
1816 #[cfg(feature = "usb")]
1817 Some(&usb_control_tube),
1818 #[cfg(not(feature = "usb"))]
1819 None,
1820 &mut linux.bat_control,
1821 &vcpu_handles,
1822 ),
1823 };
1824
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001825 if let Err(e) = tube.send(&response) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001826 error!("failed to send VmResponse: {}", e);
1827 }
1828 if let Some(run_mode) = run_mode_opt {
1829 info!("control socket changed run mode to {}", run_mode);
1830 match run_mode {
1831 VmRunMode::Exiting => {
Michael Hoylee392c462020-10-07 03:29:24 -07001832 break 'wait;
Jakub Starond99cd0a2019-04-11 14:09:39 -07001833 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001834 other => {
Chuanxiao Dong2bbe85c2020-11-12 17:18:07 +08001835 if other == VmRunMode::Running {
Daniel Verkampda4e8a92021-07-21 13:49:02 -07001836 for dev in &linux.resume_notify_devices {
1837 dev.lock().resume_imminent();
1838 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001839 }
Anton Romanov5acc0f52022-01-28 00:18:11 +00001840 vcpu::kick_all_vcpus(
Steven Richman11dc6712020-09-02 15:39:14 -07001841 &vcpu_handles,
Zach Reiznerdc748482021-04-14 13:59:30 -07001842 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08001843 VcpuControl::RunState(other),
Steven Richman11dc6712020-09-02 15:39:14 -07001844 );
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001845 }
1846 }
1847 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001848 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001849 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001850 if let TubeError::Disconnected = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001851 vm_control_indices_to_remove.push(index);
1852 } else {
1853 error!("failed to recv VmRequest: {}", e);
1854 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001855 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001856 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001857 TaggedControlTube::VmMemory(tube) => {
1858 match tube.recv::<VmMemoryRequest>() {
1859 Ok(request) => {
1860 let response = request.execute(
1861 &mut linux.vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07001862 &mut sys_allocator,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001863 Arc::clone(&map_request),
1864 &mut gralloc,
1865 );
1866 if let Err(e) = tube.send(&response) {
1867 error!("failed to send VmMemoryControlResponse: {}", e);
1868 }
1869 }
1870 Err(e) => {
1871 if let TubeError::Disconnected = e {
1872 vm_control_indices_to_remove.push(index);
1873 } else {
1874 error!("failed to recv VmMemoryControlRequest: {}", e);
1875 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001876 }
1877 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001878 }
1879 TaggedControlTube::VmIrq(tube) => match tube.recv::<VmIrqRequest>() {
Xiong Zhang2515b752019-09-19 10:29:02 +08001880 Ok(request) => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001881 let response = {
1882 let irq_chip = &mut linux.irq_chip;
1883 request.execute(
1884 |setup| match setup {
1885 IrqSetup::Event(irq, ev) => {
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001886 if let Some(event_index) = irq_chip
1887 .register_irq_event(irq, ev, None)?
1888 {
1889 match wait_ctx.add(
1890 ev,
1891 Token::IrqFd {
1892 index: event_index
1893 },
1894 ) {
1895 Err(e) => {
1896 warn!("failed to add IrqFd to poll context: {}", e);
1897 Err(e)
1898 },
1899 Ok(_) => {
1900 Ok(())
1901 }
1902 }
1903 } else {
1904 Ok(())
1905 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001906 }
1907 IrqSetup::Route(route) => irq_chip.route_irq(route),
Xiong Zhang4fbc5542021-06-01 11:29:14 +08001908 IrqSetup::UnRegister(irq, ev) => irq_chip.unregister_irq_event(irq, ev),
Steven Richmanf32d0b42020-06-20 21:45:32 -07001909 },
Zach Reiznerdc748482021-04-14 13:59:30 -07001910 &mut sys_allocator,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001911 )
1912 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001913 if let Err(e) = tube.send(&response) {
Xiong Zhang2515b752019-09-19 10:29:02 +08001914 error!("failed to send VmIrqResponse: {}", e);
1915 }
1916 }
1917 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001918 if let TubeError::Disconnected = e {
Xiong Zhang2515b752019-09-19 10:29:02 +08001919 vm_control_indices_to_remove.push(index);
1920 } else {
1921 error!("failed to recv VmIrqRequest: {}", e);
1922 }
1923 }
1924 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001925 TaggedControlTube::VmMsync(tube) => {
1926 match tube.recv::<VmMsyncRequest>() {
1927 Ok(request) => {
1928 let response = request.execute(&mut linux.vm);
1929 if let Err(e) = tube.send(&response) {
1930 error!("failed to send VmMsyncResponse: {}", e);
1931 }
1932 }
1933 Err(e) => {
1934 if let TubeError::Disconnected = e {
1935 vm_control_indices_to_remove.push(index);
1936 } else {
1937 error!("failed to recv VmMsyncRequest: {}", e);
1938 }
Daniel Verkampe1980a92020-02-07 11:00:55 -08001939 }
1940 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001941 }
1942 TaggedControlTube::Fs(tube) => match tube.recv::<FsMappingRequest>() {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001943 Ok(request) => {
1944 let response =
Zach Reiznerdc748482021-04-14 13:59:30 -07001945 request.execute(&mut linux.vm, &mut sys_allocator);
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001946 if let Err(e) = tube.send(&response) {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001947 error!("failed to send VmResponse: {}", e);
1948 }
1949 }
1950 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001951 if let TubeError::Disconnected = e {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001952 vm_control_indices_to_remove.push(index);
1953 } else {
1954 error!("failed to recv VmResponse: {}", e);
1955 }
1956 }
1957 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08001958 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001959 }
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001960 if !add_tubes.is_empty() {
1961 for (idx, socket) in add_tubes.iter().enumerate() {
1962 wait_ctx
1963 .add(
1964 socket.as_ref(),
1965 Token::VmControl {
1966 index: idx + control_tubes.len(),
1967 },
1968 )
1969 .context(
1970 "failed to add hotplug vfio-pci descriptor ot wait context",
1971 )?;
1972 }
1973 control_tubes.append(&mut add_tubes);
1974 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001975 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001976 }
1977 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001978
Vikram Auradkarede68c72021-07-01 14:33:54 -07001979 // It's possible more data is readable and buffered while the socket is hungup,
1980 // so don't delete the tube from the poll context until we're sure all the
1981 // data is read.
1982 // Below case covers a condition where we have received a hungup event and the tube is not
1983 // readable.
1984 // In case of readable tube, once all data is read, any attempt to read more data on hungup
1985 // tube should fail. On such failure, we get Disconnected error and index gets added to
1986 // vm_control_indices_to_remove by the time we reach here.
1987 for event in events.iter().filter(|e| e.is_hungup && !e.is_readable) {
1988 if let Token::VmControl { index } = event.token {
1989 vm_control_indices_to_remove.push(index);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001990 }
1991 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001992
1993 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08001994 // preserves correct indexes as each element is removed.
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07001995 vm_control_indices_to_remove.sort_unstable_by_key(|&k| Reverse(k));
Zach Reiznera60744b2019-02-13 17:33:32 -08001996 vm_control_indices_to_remove.dedup();
1997 for index in vm_control_indices_to_remove {
Michael Hoylee392c462020-10-07 03:29:24 -07001998 // Delete the socket from the `wait_ctx` synchronously. Otherwise, the kernel will do
1999 // this automatically when the FD inserted into the `wait_ctx` is closed after this
Zide Chen89584072019-11-14 10:33:51 -08002000 // if-block, but this removal can be deferred unpredictably. In some instances where the
Michael Hoylee392c462020-10-07 03:29:24 -07002001 // system is under heavy load, we can even get events returned by `wait_ctx` for an FD
Zide Chen89584072019-11-14 10:33:51 -08002002 // that has already been closed. Because the token associated with that spurious event
2003 // now belongs to a different socket, the control loop will start to interact with
2004 // sockets that might not be ready to use. This can cause incorrect hangup detection or
2005 // blocking on a socket that will never be ready. See also: crbug.com/1019986
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002006 if let Some(socket) = control_tubes.get(index) {
Daniel Verkamp6b298582021-08-16 15:37:11 -07002007 wait_ctx
2008 .delete(socket)
2009 .context("failed to remove descriptor from wait context")?;
Zide Chen89584072019-11-14 10:33:51 -08002010 }
2011
2012 // This line implicitly drops the socket at `index` when it gets returned by
2013 // `swap_remove`. After this line, the socket at `index` is not the one from
2014 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
Michael Hoylee392c462020-10-07 03:29:24 -07002015 // use `wait_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002016 control_tubes.swap_remove(index);
2017 if let Some(tube) = control_tubes.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07002018 wait_ctx
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002019 .modify(tube, EventType::Read, Token::VmControl { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07002020 .context("failed to add descriptor to wait context")?;
Zach Reiznera60744b2019-02-13 17:33:32 -08002021 }
2022 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002023 }
2024
Anton Romanov5acc0f52022-01-28 00:18:11 +00002025 vcpu::kick_all_vcpus(
Zach Reiznerdc748482021-04-14 13:59:30 -07002026 &vcpu_handles,
2027 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08002028 VcpuControl::RunState(VmRunMode::Exiting),
Zach Reiznerdc748482021-04-14 13:59:30 -07002029 );
Steven Richman11dc6712020-09-02 15:39:14 -07002030 for (handle, _) in vcpu_handles {
2031 if let Err(e) = handle.join() {
2032 error!("failed to join vcpu thread: {:?}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08002033 }
2034 }
2035
Daniel Verkamp94c35272019-09-12 13:31:30 -07002036 // Explicitly drop the VM structure here to allow the devices to clean up before the
2037 // control sockets are closed when this function exits.
2038 mem::drop(linux);
2039
Zach Reizner19ad1f32019-12-12 18:58:50 -08002040 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08002041 .set_canon_mode()
2042 .expect("failed to restore canonical mode for terminal");
2043
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08002044 Ok(exit_state)
Zach Reizner39aa26b2017-12-12 18:03:23 -08002045}