blob: ad8d313af2050780e6bdd87a84dee68f36b10ebb [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Chuanxiao Dongcb03ec62022-01-20 08:25:38 +08005use std::cmp::{max, Reverse};
Anton Romanov5acc0f52022-01-28 00:18:11 +00006use std::collections::BTreeMap;
7use std::convert::TryInto;
Dylan Reid059a1882018-07-23 17:58:09 -07008use std::fs::{File, OpenOptions};
Vineeth Pillai2b6855e2022-01-12 16:57:22 +00009use std::io::prelude::*;
Federico 'Morg' Pareschia1184822021-09-09 10:52:58 +090010use std::io::stdin;
Steven Richmanf32d0b42020-06-20 21:45:32 -070011use std::iter;
Daniel Verkamp94c35272019-09-12 13:31:30 -070012use std::mem;
Anton Romanovd43ae3c2022-01-31 17:32:54 +000013#[cfg(feature = "gpu")]
14use std::os::unix::net::UnixStream;
15use std::os::unix::prelude::OpenOptionsExt;
16use std::path::Path;
Dylan Reidb0492662019-05-17 14:50:13 -070017use std::sync::{mpsc, Arc, Barrier};
Hikaru Nishida584e52c2021-04-27 17:37:08 +090018use std::time::Duration;
Dylan Reidb0492662019-05-17 14:50:13 -070019
Vineeth Pillai2b6855e2022-01-12 16:57:22 +000020use std::process;
Anton Romanov5acc0f52022-01-28 00:18:11 +000021#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reizner39aa26b2017-12-12 18:03:23 -080022use std::thread;
Zach Reizner39aa26b2017-12-12 18:03:23 -080023
Anton Romanov5acc0f52022-01-28 00:18:11 +000024use libc;
Zach Reizner39aa26b2017-12-12 18:03:23 -080025
Tomasz Jeznach42644642020-05-20 23:27:59 -070026use acpi_tables::sdt::SDT;
27
Daniel Verkamp6b298582021-08-16 15:37:11 -070028use anyhow::{anyhow, bail, Context, Result};
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +090029use base::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080030use base::*;
Anton Romanov5acc0f52022-01-28 00:18:11 +000031use devices::serial_device::SerialHardware;
Zide Chenafdb9382021-06-17 12:04:43 -070032use devices::vfio::{VfioCommonSetup, VfioCommonTrait};
Anton Romanovd43ae3c2022-01-31 17:32:54 +000033#[cfg(feature = "gpu")]
Anton Romanov5acc0f52022-01-28 00:18:11 +000034use devices::virtio::{self, EventDevice};
paulhsiace17e6e2020-08-28 18:37:45 +080035#[cfg(feature = "audio")]
36use devices::Ac97Dev;
Xiong Zhang17b0daf2019-04-23 17:14:50 +080037use devices::{
Anton Romanov5acc0f52022-01-28 00:18:11 +000038 self, BusDeviceObj, HostHotPlugKey, HotPlugBus, IrqEventIndex, KvmKernelIrqChip, PciAddress,
39 PciBridge, PciDevice, PcieRootPort, StubPciDevice, VfioContainer, VirtioPciDevice,
Xiong Zhang17b0daf2019-04-23 17:14:50 +080040};
Chuanxiao Donga8d427b2022-01-07 10:26:24 +080041use devices::{CoIommuDev, IommuDevType};
Daniel Verkampf1439d42021-05-21 13:55:10 -070042#[cfg(feature = "usb")]
43use devices::{HostBackendDeviceProvider, XhciController};
Steven Richmanf32d0b42020-06-20 21:45:32 -070044use hypervisor::kvm::{Kvm, KvmVcpu, KvmVm};
Anton Romanov5acc0f52022-01-28 00:18:11 +000045use hypervisor::{HypervisorCap, ProtectionType, Vm, VmCap};
Allen Webbf3024c82020-06-19 07:19:48 -070046use minijail::{self, Minijail};
Anton Romanov5acc0f52022-01-28 00:18:11 +000047use resources::{Alloc, SystemAllocator};
Gurchetan Singh293913c2020-12-09 10:44:13 -080048use rutabaga_gfx::RutabagaGralloc;
Dylan Reidb0492662019-05-17 14:50:13 -070049use sync::Mutex;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080050use vm_control::*;
Sergey Senozhatskyd78d05b2021-04-13 20:59:58 +090051use vm_memory::{GuestAddress, GuestMemory, MemoryPolicy};
Zach Reizner39aa26b2017-12-12 18:03:23 -080052
Keiichi Watanabec5262e92020-10-21 15:57:33 +090053#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
54use crate::gdb::{gdb_thread, GdbStub};
Anton Romanovd43ae3c2022-01-31 17:32:54 +000055use crate::{Config, Executable, SharedDir, SharedDirKind, VfioType};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070056use arch::{
Keiichi Watanabe553d2192021-08-16 16:42:27 +090057 self, LinuxArch, RunnableLinuxVm, VcpuAffinity, VirtioDeviceStub, VmComponents, VmImage,
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070058};
Sonny Raoed517d12018-02-13 22:09:43 -080059
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080060#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070061use {
62 aarch64::AArch64 as Arch,
Steven Richman11dc6712020-09-02 15:39:14 -070063 devices::IrqChipAArch64 as IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -070064 hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
65};
Zach Reizner55a9e502018-10-03 10:22:32 -070066#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070067use {
Steven Richman11dc6712020-09-02 15:39:14 -070068 devices::{IrqChipX86_64 as IrqChipArch, KvmSplitIrqChip},
69 hypervisor::{VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
Steven Richmanf32d0b42020-06-20 21:45:32 -070070 x86_64::X8664arch as Arch,
71};
Zach Reizner39aa26b2017-12-12 18:03:23 -080072
Anton Romanov5acc0f52022-01-28 00:18:11 +000073mod device_helpers;
74use device_helpers::*;
75mod jail_helpers;
76use jail_helpers::*;
77mod vcpu;
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +090078
David Tolnay2b089fc2019-03-04 15:33:22 -080079#[cfg(feature = "gpu")]
Anton Romanov5acc0f52022-01-28 00:18:11 +000080mod gpu;
Chirantan Ekbote44292f52021-06-25 18:31:41 +090081#[cfg(feature = "gpu")]
Dmitry Torokhove464a7a2022-01-26 13:29:36 -080082pub use gpu::GpuRenderServerParameters;
83#[cfg(feature = "gpu")]
Anton Romanov5acc0f52022-01-28 00:18:11 +000084use gpu::*;
Jorge E. Moreirad4562d02021-06-28 16:21:12 -070085
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080086// gpu_device_tube is not used when GPU support is disabled.
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -070087#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -080088fn create_virtio_devices(
89 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -070090 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -070091 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -070092 _exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080093 wayland_device_tube: Tube,
94 gpu_device_tube: Tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +090095 vhost_user_gpu_tubes: Vec<(Tube, Tube)>,
Andrew Walbran3cd93602022-01-25 13:59:23 +000096 balloon_device_tube: Option<Tube>,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +080097 balloon_inflate_tube: Option<Tube>,
David Stevens06d157a2022-01-13 23:44:48 +090098 init_balloon_size: u64,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080099 disk_device_tubes: &mut Vec<Tube>,
100 pmem_device_tubes: &mut Vec<Tube>,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -0800101 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800102 fs_device_tubes: &mut Vec<Tube>,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -0800103 #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>,
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -0800104 vvu_proxy_device_tubes: &mut Vec<Tube>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800105) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -0700106 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -0800107
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900108 #[cfg(feature = "gpu")]
109 for (opt, (host_tube, device_tube)) in cfg.vhost_user_gpu.iter().zip(vhost_user_gpu_tubes) {
110 devs.push(create_vhost_user_gpu_device(
111 cfg,
112 opt,
113 host_tube,
114 device_tube,
115 )?);
116 }
117
Abhishek Bhardwaj103c1b72021-11-01 15:52:23 -0700118 for opt in &cfg.vvu_proxy {
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -0800119 devs.push(create_vvu_proxy_device(
120 cfg,
121 opt,
122 vvu_proxy_device_tubes.remove(0),
123 )?);
Abhishek Bhardwaj103c1b72021-11-01 15:52:23 -0700124 }
125
David Tolnayfa701712019-02-13 16:42:54 -0800126 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800127 let mut resource_bridges = Vec::<Tube>::new();
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900128
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900129 if !cfg.wayland_socket_paths.is_empty() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900130 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800131 let mut wl_resource_bridge = None::<Tube>;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900132
133 #[cfg(feature = "gpu")]
134 {
Jason Macnakcc7070b2019-11-06 14:48:12 -0800135 if cfg.gpu_parameters.is_some() {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700136 let (wl_socket, gpu_socket) = Tube::pair().context("failed to create tube")?;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900137 resource_bridges.push(gpu_socket);
138 wl_resource_bridge = Some(wl_socket);
139 }
140 }
141
142 devs.push(create_wayland_device(
143 cfg,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800144 wayland_device_tube,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900145 wl_resource_bridge,
146 )?);
147 }
David Tolnayfa701712019-02-13 16:42:54 -0800148
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900149 #[cfg(feature = "video-decoder")]
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900150 let video_dec_cfg = if let Some(backend) = cfg.video_dec {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700151 let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
Daniel Verkampffb59122021-03-18 14:06:15 -0700152 resource_bridges.push(gpu_tube);
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900153 Some((video_tube, backend))
Daniel Verkampffb59122021-03-18 14:06:15 -0700154 } else {
155 None
156 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900157
158 #[cfg(feature = "video-encoder")]
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900159 let video_enc_cfg = if let Some(backend) = cfg.video_enc {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700160 let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
Daniel Verkampffb59122021-03-18 14:06:15 -0700161 resource_bridges.push(gpu_tube);
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900162 Some((video_tube, backend))
Daniel Verkampffb59122021-03-18 14:06:15 -0700163 } else {
164 None
165 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900166
Zach Reizner3a8100a2017-09-13 19:15:43 -0700167 #[cfg(feature = "gpu")]
168 {
Noah Golddc7f52b2020-02-01 13:01:58 -0800169 if let Some(gpu_parameters) = &cfg.gpu_parameters {
Anton Romanov5acc0f52022-01-28 00:18:11 +0000170 let mut gpu_display_w = virtio::DEFAULT_DISPLAY_WIDTH;
171 let mut gpu_display_h = virtio::DEFAULT_DISPLAY_HEIGHT;
Jason Macnakd659a0d2021-03-15 15:33:01 -0700172 if !gpu_parameters.displays.is_empty() {
173 gpu_display_w = gpu_parameters.displays[0].width;
174 gpu_display_h = gpu_parameters.displays[0].height;
175 }
176
Zach Reizner65b98f12019-11-22 17:34:58 -0800177 let mut event_devices = Vec::new();
178 if cfg.display_window_mouse {
179 let (event_device_socket, virtio_dev_socket) =
Daniel Verkamp6b298582021-08-16 15:37:11 -0700180 UnixStream::pair().context("failed to create socket")?;
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000181 let (multi_touch_width, multi_touch_height) = cfg
182 .virtio_multi_touch
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700183 .first()
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800184 .as_ref()
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000185 .map(|multi_touch_spec| multi_touch_spec.get_size())
Jason Macnakd659a0d2021-03-15 15:33:01 -0700186 .unwrap_or((gpu_display_w, gpu_display_h));
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000187 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700188 // u32::MAX is the least likely to collide with the indices generated above for
189 // the multi_touch options, which begin at 0.
190 u32::MAX,
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800191 virtio_dev_socket,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000192 multi_touch_width,
193 multi_touch_height,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700194 virtio::base_features(cfg.protected_vm),
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800195 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700196 .context("failed to set up mouse device")?;
Zach Reizner65b98f12019-11-22 17:34:58 -0800197 devs.push(VirtioDeviceStub {
198 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700199 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -0800200 });
201 event_devices.push(EventDevice::touchscreen(event_device_socket));
202 }
203 if cfg.display_window_keyboard {
204 let (event_device_socket, virtio_dev_socket) =
Daniel Verkamp6b298582021-08-16 15:37:11 -0700205 UnixStream::pair().context("failed to create socket")?;
Noah Goldd4ca29b2020-10-27 12:21:52 -0700206 let dev = virtio::new_keyboard(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700207 // u32::MAX is the least likely to collide with the indices generated above for
208 // the multi_touch options, which begin at 0.
209 u32::MAX,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700210 virtio_dev_socket,
211 virtio::base_features(cfg.protected_vm),
212 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700213 .context("failed to set up keyboard device")?;
Zach Reizner65b98f12019-11-22 17:34:58 -0800214 devs.push(VirtioDeviceStub {
215 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700216 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -0800217 });
218 event_devices.push(EventDevice::keyboard(event_device_socket));
219 }
Chia-I Wu16fb6592021-11-10 11:45:32 -0800220
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700221 devs.push(create_gpu_device(
222 cfg,
223 _exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800224 gpu_device_tube,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700225 resource_bridges,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900226 // Use the unnamed socket for GPU display screens.
227 cfg.wayland_socket_paths.get(""),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700228 cfg.x_display.clone(),
Chia-I Wu16fb6592021-11-10 11:45:32 -0800229 render_server_fd,
Zach Reizner65b98f12019-11-22 17:34:58 -0800230 event_devices,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -0800231 map_request,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700232 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -0700233 }
234 }
235
Richard Fung08289b12022-02-02 20:46:19 +0000236 for (_, param) in cfg
237 .serial_parameters
238 .iter()
239 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
240 {
241 let dev = create_console_device(cfg, param)?;
242 devs.push(dev);
243 }
244
245 for disk in &cfg.disks {
246 let disk_device_tube = disk_device_tubes.remove(0);
247 devs.push(create_block_device(cfg, disk, disk_device_tube)?);
248 }
249
250 for blk in &cfg.vhost_user_blk {
251 devs.push(create_vhost_user_block_device(cfg, blk)?);
252 }
253
254 for console in &cfg.vhost_user_console {
255 devs.push(create_vhost_user_console_device(cfg, console)?);
256 }
257
258 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
259 let pmem_device_tube = pmem_device_tubes.remove(0);
260 devs.push(create_pmem_device(
261 cfg,
262 vm,
263 resources,
264 pmem_disk,
265 index,
266 pmem_device_tube,
267 )?);
268 }
269
270 devs.push(create_rng_device(cfg)?);
271
272 #[cfg(feature = "tpm")]
273 {
274 if cfg.software_tpm {
275 devs.push(create_tpm_device(cfg)?);
276 }
277 }
278
279 for (idx, single_touch_spec) in cfg.virtio_single_touch.iter().enumerate() {
280 devs.push(create_single_touch_device(
281 cfg,
282 single_touch_spec,
283 idx as u32,
284 )?);
285 }
286
287 for (idx, multi_touch_spec) in cfg.virtio_multi_touch.iter().enumerate() {
288 devs.push(create_multi_touch_device(
289 cfg,
290 multi_touch_spec,
291 idx as u32,
292 )?);
293 }
294
295 for (idx, trackpad_spec) in cfg.virtio_trackpad.iter().enumerate() {
296 devs.push(create_trackpad_device(cfg, trackpad_spec, idx as u32)?);
297 }
298
299 for (idx, mouse_socket) in cfg.virtio_mice.iter().enumerate() {
300 devs.push(create_mouse_device(cfg, mouse_socket, idx as u32)?);
301 }
302
303 for (idx, keyboard_socket) in cfg.virtio_keyboard.iter().enumerate() {
304 devs.push(create_keyboard_device(cfg, keyboard_socket, idx as u32)?);
305 }
306
307 for (idx, switches_socket) in cfg.virtio_switches.iter().enumerate() {
308 devs.push(create_switches_device(cfg, switches_socket, idx as u32)?);
309 }
310
311 for dev_path in &cfg.virtio_input_evdevs {
312 devs.push(create_vinput_device(cfg, dev_path)?);
313 }
314
315 if let Some(balloon_device_tube) = balloon_device_tube {
316 devs.push(create_balloon_device(
317 cfg,
318 balloon_device_tube,
319 balloon_inflate_tube,
320 init_balloon_size,
321 )?);
322 }
323
324 // We checked above that if the IP is defined, then the netmask is, too.
325 for tap_fd in &cfg.tap_fd {
326 devs.push(create_tap_net_device_from_fd(cfg, *tap_fd)?);
327 }
328
329 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
330 (cfg.host_ip, cfg.netmask, cfg.mac_address)
331 {
332 if !cfg.vhost_user_net.is_empty() {
333 bail!("vhost-user-net cannot be used with any of --host_ip, --netmask or --mac");
334 }
335 devs.push(create_net_device_from_config(
336 cfg,
337 host_ip,
338 netmask,
339 mac_address,
340 )?);
341 }
342
343 for tap_name in &cfg.tap_name {
344 devs.push(create_tap_net_device_from_name(cfg, tap_name.as_bytes())?);
345 }
346
347 for net in &cfg.vhost_user_net {
348 devs.push(create_vhost_user_net_device(cfg, net)?);
349 }
350
351 for vsock in &cfg.vhost_user_vsock {
352 devs.push(create_vhost_user_vsock_device(cfg, vsock)?);
353 }
354
355 for opt in &cfg.vhost_user_wl {
356 devs.push(create_vhost_user_wl_device(cfg, opt)?);
357 }
358
Chih-Yang Hsiae31731c2022-01-05 17:30:28 +0800359 #[cfg(feature = "audio_cras")]
360 {
361 for cras_snd in &cfg.cras_snds {
362 devs.push(create_cras_snd_device(cfg, cras_snd.clone())?);
363 }
364 }
365
Daniel Verkampffb59122021-03-18 14:06:15 -0700366 #[cfg(feature = "video-decoder")]
367 {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900368 if let Some((video_dec_tube, video_dec_backend)) = video_dec_cfg {
Daniel Verkampffb59122021-03-18 14:06:15 -0700369 register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900370 video_dec_backend,
Daniel Verkampffb59122021-03-18 14:06:15 -0700371 &mut devs,
372 video_dec_tube,
373 cfg,
374 devices::virtio::VideoDeviceType::Decoder,
375 )?;
376 }
377 }
378
379 #[cfg(feature = "video-encoder")]
380 {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900381 if let Some((video_enc_tube, video_enc_backend)) = video_enc_cfg {
Daniel Verkampffb59122021-03-18 14:06:15 -0700382 register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900383 video_enc_backend,
Daniel Verkampffb59122021-03-18 14:06:15 -0700384 &mut devs,
385 video_enc_tube,
386 cfg,
387 devices::virtio::VideoDeviceType::Encoder,
388 )?;
389 }
390 }
391
Zach Reizneraa575662018-08-15 10:46:32 -0700392 if let Some(cid) = cfg.cid {
Chirantan Ekbote3e8d52b2021-09-10 18:27:16 +0900393 devs.push(create_vhost_vsock_device(cfg, cid)?);
Zach Reizneraa575662018-08-15 10:46:32 -0700394 }
395
Woody Chow5890b702021-02-12 14:57:02 +0900396 for vhost_user_fs in &cfg.vhost_user_fs {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700397 devs.push(create_vhost_user_fs_device(cfg, vhost_user_fs)?);
Woody Chow5890b702021-02-12 14:57:02 +0900398 }
399
Woody Chow1b16db12021-04-02 16:59:59 +0900400 #[cfg(feature = "audio")]
401 for vhost_user_snd in &cfg.vhost_user_snd {
402 devs.push(create_vhost_user_snd_device(cfg, vhost_user_snd)?);
403 }
404
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900405 for shared_dir in &cfg.shared_dirs {
406 let SharedDir {
407 src,
408 tag,
409 kind,
410 uid_map,
411 gid_map,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +0900412 fs_cfg,
413 p9_cfg,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900414 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -0800415
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900416 let dev = match kind {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +0900417 SharedDirKind::FS => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800418 let device_tube = fs_device_tubes.remove(0);
419 create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone(), device_tube)?
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +0900420 }
Chirantan Ekbote75ba8752020-10-27 18:33:02 +0900421 SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag, p9_cfg.clone())?,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900422 };
423 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -0800424 }
425
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900426 if let Some(vhost_user_mac80211_hwsim) = &cfg.vhost_user_mac80211_hwsim {
427 devs.push(create_vhost_user_mac80211_hwsim_device(
428 cfg,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700429 vhost_user_mac80211_hwsim,
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900430 )?);
431 }
432
Jorge E. Moreirad4562d02021-06-28 16:21:12 -0700433 #[cfg(feature = "audio")]
434 if let Some(path) = &cfg.sound {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700435 devs.push(create_sound_device(path, cfg)?);
Jorge E. Moreirad4562d02021-06-28 16:21:12 -0700436 }
437
David Tolnay2b089fc2019-03-04 15:33:22 -0800438 Ok(devs)
439}
440
441fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -0600442 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -0700443 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -0700444 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -0700445 exit_evt: &Event,
Zide Chen71435c12021-03-03 15:02:02 -0800446 phys_max_addr: u64,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800447 control_tubes: &mut Vec<TaggedControlTube>,
448 wayland_device_tube: Tube,
449 gpu_device_tube: Tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900450 vhost_user_gpu_tubes: Vec<(Tube, Tube)>,
Andrew Walbran3cd93602022-01-25 13:59:23 +0000451 balloon_device_tube: Option<Tube>,
David Stevens06d157a2022-01-13 23:44:48 +0900452 init_balloon_size: u64,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800453 disk_device_tubes: &mut Vec<Tube>,
454 pmem_device_tubes: &mut Vec<Tube>,
455 fs_device_tubes: &mut Vec<Tube>,
Daniel Verkampf1439d42021-05-21 13:55:10 -0700456 #[cfg(feature = "usb")] usb_provider: HostBackendDeviceProvider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -0800457 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -0800458 #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>,
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -0800459 vvu_proxy_device_tubes: &mut Vec<Tube>,
Tomasz Nowickiab86d522021-09-22 05:50:46 +0000460) -> DeviceResult<Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>> {
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800461 let mut devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)> = Vec::new();
462 let mut balloon_inflate_tube: Option<Tube> = None;
Zide Chen5deee482021-04-19 11:06:01 -0700463 if !cfg.vfio.is_empty() {
Zide Chendfc4b882021-03-10 16:35:37 -0800464 let mut iommu_attached_endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>> =
465 BTreeMap::new();
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800466 let mut coiommu_attached_endpoints = Vec::new();
Zide Chendfc4b882021-03-10 16:35:37 -0800467
Tomasz Nowicki71aca792021-06-09 18:53:49 +0000468 for vfio_dev in cfg
469 .vfio
470 .iter()
471 .filter(|dev| dev.get_type() == VfioType::Pci)
472 {
473 let vfio_path = &vfio_dev.vfio_path;
Zide Chen5deee482021-04-19 11:06:01 -0700474 let (vfio_pci_device, jail) = create_vfio_device(
475 cfg,
476 vm,
477 resources,
478 control_tubes,
479 vfio_path.as_path(),
Xiong Zhangf82f2dc2021-05-21 16:54:12 +0800480 None,
Zide Chendfc4b882021-03-10 16:35:37 -0800481 &mut iommu_attached_endpoints,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800482 Some(&mut coiommu_attached_endpoints),
483 vfio_dev.iommu_dev_type(),
Zide Chen5deee482021-04-19 11:06:01 -0700484 )?;
Zide Chendfc4b882021-03-10 16:35:37 -0800485
Tomasz Nowickiab86d522021-09-22 05:50:46 +0000486 devices.push((vfio_pci_device, jail));
Zide Chen5deee482021-04-19 11:06:01 -0700487 }
Zide Chendfc4b882021-03-10 16:35:37 -0800488
Tomasz Nowicki344eb142021-09-22 05:51:58 +0000489 for vfio_dev in cfg
490 .vfio
491 .iter()
492 .filter(|dev| dev.get_type() == VfioType::Platform)
493 {
494 let vfio_path = &vfio_dev.vfio_path;
495 let (vfio_plat_dev, jail) = create_vfio_platform_device(
496 cfg,
497 vm,
498 resources,
499 control_tubes,
500 vfio_path.as_path(),
501 &mut iommu_attached_endpoints,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800502 IommuDevType::NoIommu, // Virtio IOMMU is not supported yet
Tomasz Nowicki344eb142021-09-22 05:51:58 +0000503 )?;
504
505 devices.push((Box::new(vfio_plat_dev), jail));
506 }
507
Chuanxiao Dongcb03ec62022-01-20 08:25:38 +0800508 if !coiommu_attached_endpoints.is_empty() || !iommu_attached_endpoints.is_empty() {
509 let mut buf = mem::MaybeUninit::<libc::rlimit>::zeroed();
510 let res = unsafe { libc::getrlimit(libc::RLIMIT_MEMLOCK, buf.as_mut_ptr()) };
511 if res == 0 {
512 let limit = unsafe { buf.assume_init() };
513 let rlim_new = limit
514 .rlim_cur
515 .saturating_add(vm.get_memory().memory_size() as libc::rlim_t);
516 let rlim_max = max(limit.rlim_max, rlim_new);
517 if limit.rlim_cur < rlim_new {
518 let limit_arg = libc::rlimit {
519 rlim_cur: rlim_new as libc::rlim_t,
520 rlim_max: rlim_max as libc::rlim_t,
521 };
522 let res = unsafe { libc::setrlimit(libc::RLIMIT_MEMLOCK, &limit_arg) };
523 if res != 0 {
524 bail!("Set rlimit failed");
525 }
526 }
527 } else {
528 bail!("Get rlimit failed");
529 }
530 }
531
Zide Chendfc4b882021-03-10 16:35:37 -0800532 if !iommu_attached_endpoints.is_empty() {
Zide Chen71435c12021-03-03 15:02:02 -0800533 let iommu_dev = create_iommu_device(cfg, phys_max_addr, iommu_attached_endpoints)?;
Zide Chendfc4b882021-03-10 16:35:37 -0800534
Daniel Verkamp6b298582021-08-16 15:37:11 -0700535 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
Zide Chendfc4b882021-03-10 16:35:37 -0800536 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
Peter Fangad3b24e2021-06-21 00:43:29 -0700537 let mut dev =
538 VirtioPciDevice::new(vm.get_memory().clone(), iommu_dev.dev, msi_device_tube)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700539 .context("failed to create virtio pci dev")?;
Peter Fangad3b24e2021-06-21 00:43:29 -0700540 // early reservation for viommu.
541 dev.allocate_address(resources)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700542 .context("failed to allocate resources early for virtio pci dev")?;
Peter Fangad3b24e2021-06-21 00:43:29 -0700543 let dev = Box::new(dev);
Tomasz Nowickiab86d522021-09-22 05:50:46 +0000544 devices.push((dev, iommu_dev.jail));
Zide Chendfc4b882021-03-10 16:35:37 -0800545 }
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800546
547 if !coiommu_attached_endpoints.is_empty() {
548 let vfio_container =
549 VfioCommonSetup::vfio_get_container(IommuDevType::CoIommu, None as Option<&Path>)
550 .context("failed to get vfio container")?;
551 let (coiommu_host_tube, coiommu_device_tube) =
552 Tube::pair().context("failed to create coiommu tube")?;
553 control_tubes.push(TaggedControlTube::VmMemory(coiommu_host_tube));
554 let vcpu_count = cfg.vcpu_count.unwrap_or(1) as u64;
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800555 let (coiommu_tube, balloon_tube) =
556 Tube::pair().context("failed to create coiommu tube")?;
557 balloon_inflate_tube = Some(balloon_tube);
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800558 let dev = CoIommuDev::new(
559 vm.get_memory().clone(),
560 vfio_container,
561 coiommu_device_tube,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800562 coiommu_tube,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800563 coiommu_attached_endpoints,
564 vcpu_count,
Chuanxiao Dongd4468612022-01-14 14:21:17 +0800565 cfg.coiommu_param.unwrap_or_default(),
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800566 )
567 .context("failed to create coiommu device")?;
568
569 devices.push((Box::new(dev), simple_jail(cfg, "coiommu")?));
570 }
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800571 }
572
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800573 let stubs = create_virtio_devices(
574 cfg,
575 vm,
576 resources,
577 exit_evt,
578 wayland_device_tube,
579 gpu_device_tube,
580 vhost_user_gpu_tubes,
581 balloon_device_tube,
582 balloon_inflate_tube,
David Stevens06d157a2022-01-13 23:44:48 +0900583 init_balloon_size,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800584 disk_device_tubes,
585 pmem_device_tubes,
586 map_request,
587 fs_device_tubes,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -0800588 #[cfg(feature = "gpu")]
589 render_server_fd,
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -0800590 vvu_proxy_device_tubes,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800591 )?;
592
593 for stub in stubs {
594 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
595 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
596 let dev = VirtioPciDevice::new(vm.get_memory().clone(), stub.dev, msi_device_tube)
597 .context("failed to create virtio pci dev")?;
598 let dev = Box::new(dev) as Box<dyn BusDeviceObj>;
599 devices.push((dev, stub.jail));
600 }
601
602 #[cfg(feature = "audio")]
603 for ac97_param in &cfg.ac97_parameters {
604 let dev = Ac97Dev::try_new(vm.get_memory().clone(), ac97_param.clone())
605 .context("failed to create ac97 device")?;
606 let jail = simple_jail(cfg, dev.minijail_policy())?;
607 devices.push((Box::new(dev), jail));
608 }
609
610 #[cfg(feature = "usb")]
611 {
612 // Create xhci controller.
613 let usb_controller = Box::new(XhciController::new(vm.get_memory().clone(), usb_provider));
614 devices.push((usb_controller, simple_jail(cfg, "xhci")?));
615 }
616
Mattias Nisslerde2c6402021-10-21 12:05:29 +0000617 for params in &cfg.stub_pci_devices {
618 // Stub devices don't need jailing since they don't do anything.
619 devices.push((Box::new(StubPciDevice::new(params)), None));
620 }
621
Tomasz Nowickiab86d522021-09-22 05:50:46 +0000622 Ok(devices)
David Tolnay2b089fc2019-03-04 15:33:22 -0800623}
624
Mattias Nisslerbbd91d02021-12-07 08:57:45 +0000625fn create_file_backed_mappings(
626 cfg: &Config,
627 vm: &mut impl Vm,
628 resources: &mut SystemAllocator,
629) -> Result<()> {
630 for mapping in &cfg.file_backed_mappings {
631 let file = OpenOptions::new()
632 .read(true)
633 .write(mapping.writable)
634 .custom_flags(if mapping.sync { libc::O_SYNC } else { 0 })
635 .open(&mapping.path)
636 .context("failed to open file for file-backed mapping")?;
637 let prot = if mapping.writable {
638 Protection::read_write()
639 } else {
640 Protection::read()
641 };
642 let size = mapping
643 .size
644 .try_into()
645 .context("Invalid size for file-backed mapping")?;
646 let memory_mapping = MemoryMappingBuilder::new(size)
647 .from_file(&file)
648 .offset(mapping.offset)
649 .protection(prot)
650 .build()
651 .context("failed to map backing file for file-backed mapping")?;
652
653 resources
654 .mmio_allocator_any()
655 .allocate_at(
656 mapping.address,
657 mapping.size,
658 Alloc::FileBacked(mapping.address),
659 "file-backed mapping".to_owned(),
660 )
661 .context("failed to allocate guest address for file-backed mapping")?;
662
663 vm.add_memory_region(
664 GuestAddress(mapping.address),
665 Box::new(memory_mapping),
666 !mapping.writable,
667 /* log_dirty_pages = */ false,
668 )
669 .context("failed to configure file-backed mapping")?;
670 }
671
672 Ok(())
673}
674
Zach Reiznera90649a2021-03-31 12:56:08 -0700675fn setup_vm_components(cfg: &Config) -> Result<VmComponents> {
David Tolnay2b089fc2019-03-04 15:33:22 -0800676 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +0100677 Some(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +0900678 open_file(
679 initrd_path,
680 true, /*read_only*/
681 false, /*O_DIRECT*/
682 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700683 .with_context(|| format!("failed to open initrd {}", initrd_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +0100684 )
Daniel Verkampe403f5c2018-12-11 16:29:26 -0800685 } else {
686 None
687 };
688
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700689 let vm_image = match cfg.executable_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +0100690 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +0900691 open_file(
692 kernel_path,
693 true, /*read_only*/
694 false, /*O_DIRECT*/
695 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700696 .with_context(|| format!("failed to open kernel image {}", kernel_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +0100697 ),
698 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +0900699 open_file(bios_path, true /*read_only*/, false /*O_DIRECT*/)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700700 .with_context(|| format!("failed to open bios {}", bios_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +0100701 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700702 _ => panic!("Did not receive a bios or kernel, should be impossible."),
703 };
704
Will Deaconc48e7832021-07-30 19:03:06 +0100705 let swiotlb = if let Some(size) = cfg.swiotlb {
706 Some(
707 size.checked_mul(1024 * 1024)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700708 .ok_or_else(|| anyhow!("requested swiotlb size too large"))?,
Will Deaconc48e7832021-07-30 19:03:06 +0100709 )
710 } else {
711 match cfg.protected_vm {
Andrew Walbran0bbbb682021-12-13 13:42:07 +0000712 ProtectionType::Protected | ProtectionType::ProtectedWithoutFirmware => {
713 Some(64 * 1024 * 1024)
714 }
Will Deaconc48e7832021-07-30 19:03:06 +0100715 ProtectionType::Unprotected => None,
716 }
717 };
718
Zach Reiznera90649a2021-03-31 12:56:08 -0700719 Ok(VmComponents {
Daniel Verkamp6a847062019-11-26 13:16:35 -0800720 memory_size: cfg
721 .memory
722 .unwrap_or(256)
723 .checked_mul(1024 * 1024)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700724 .ok_or_else(|| anyhow!("requested memory size too large"))?,
Will Deaconc48e7832021-07-30 19:03:06 +0100725 swiotlb,
Dylan Reid059a1882018-07-23 17:58:09 -0700726 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -0700727 vcpu_affinity: cfg.vcpu_affinity.clone(),
Daniel Verkamp8a72afc2021-03-15 17:55:52 -0700728 cpu_clusters: cfg.cpu_clusters.clone(),
729 cpu_capacity: cfg.cpu_capacity.clone(),
Suleiman Souhlal015c3c12020-10-07 14:15:41 +0900730 no_smt: cfg.no_smt,
Sergey Senozhatsky1e369c52021-04-13 20:23:51 +0900731 hugepages: cfg.hugepages,
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700732 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -0800733 android_fstab: cfg
734 .android_fstab
735 .as_ref()
Daniel Verkamp6b298582021-08-16 15:37:11 -0700736 .map(|x| {
737 File::open(x)
738 .with_context(|| format!("failed to open android fstab file {}", x.display()))
739 })
Tristan Muntsinger4133b012018-12-21 16:01:56 -0800740 .map_or(Ok(None), |v| v.map(Some))?,
Kansho Nishida282115b2019-12-18 13:13:14 +0900741 pstore: cfg.pstore.clone(),
Daniel Verkampe403f5c2018-12-11 16:29:26 -0800742 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -0700743 extra_kernel_params: cfg.params.clone(),
Tomasz Jeznach42644642020-05-20 23:27:59 -0700744 acpi_sdts: cfg
745 .acpi_tables
746 .iter()
Daniel Verkamp6b298582021-08-16 15:37:11 -0700747 .map(|path| {
748 SDT::from_file(path)
749 .with_context(|| format!("failed to open ACPI file {}", path.display()))
750 })
Tomasz Jeznach42644642020-05-20 23:27:59 -0700751 .collect::<Result<Vec<SDT>>>()?,
Kansho Nishidaab205af2020-08-13 18:17:50 +0900752 rt_cpus: cfg.rt_cpus.clone(),
Suleiman Souhlal63630e82021-02-18 11:53:11 +0900753 delay_rt: cfg.delay_rt,
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100754 protected_vm: cfg.protected_vm,
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900755 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznera90649a2021-03-31 12:56:08 -0700756 gdb: None,
Tomasz Jeznachccb26942021-03-30 22:44:11 -0700757 dmi_path: cfg.dmi_path.clone(),
Tomasz Jeznachd93c29f2021-04-12 11:00:24 -0700758 no_legacy: cfg.no_legacy,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +0800759 host_cpu_topology: cfg.host_cpu_topology,
Zach Reiznera90649a2021-03-31 12:56:08 -0700760 })
761}
762
Andrew Walbranb28ae8e2022-01-17 14:33:10 +0000763#[derive(Copy, Clone, Debug, Eq, PartialEq)]
Dmitry Torokhovf75699f2021-12-03 11:19:13 -0800764pub enum ExitState {
765 Reset,
766 Stop,
Andrew Walbran1a19c672022-01-24 17:24:10 +0000767 Crash,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -0800768}
769
770pub fn run_config(cfg: Config) -> Result<ExitState> {
Zach Reiznerdc748482021-04-14 13:59:30 -0700771 let components = setup_vm_components(&cfg)?;
772
773 let guest_mem_layout =
Daniel Verkamp6b298582021-08-16 15:37:11 -0700774 Arch::guest_memory_layout(&components).context("failed to create guest memory layout")?;
775 let guest_mem = GuestMemory::new(&guest_mem_layout).context("failed to create guest memory")?;
Zach Reiznerdc748482021-04-14 13:59:30 -0700776 let mut mem_policy = MemoryPolicy::empty();
777 if components.hugepages {
778 mem_policy |= MemoryPolicy::USE_HUGEPAGES;
779 }
Quentin Perret26203802021-12-02 09:48:43 +0000780 guest_mem.set_memory_policy(mem_policy);
Daniel Verkamp6b298582021-08-16 15:37:11 -0700781 let kvm = Kvm::new_with_path(&cfg.kvm_device_path).context("failed to create kvm")?;
Andrew Walbran00f1c9f2021-12-10 17:13:08 +0000782 let vm = KvmVm::new(&kvm, guest_mem, components.protected_vm).context("failed to create vm")?;
Andrew Walbrane79aba12022-01-27 14:12:35 +0000783 // Check that the VM was actually created in protected mode as expected.
784 if cfg.protected_vm != ProtectionType::Unprotected && !vm.check_capability(VmCap::Protected) {
785 bail!("Failed to create protected VM");
786 }
Daniel Verkamp6b298582021-08-16 15:37:11 -0700787 let vm_clone = vm.try_clone().context("failed to clone vm")?;
Zach Reiznerdc748482021-04-14 13:59:30 -0700788
789 enum KvmIrqChip {
790 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
791 Split(KvmSplitIrqChip),
792 Kernel(KvmKernelIrqChip),
793 }
794
795 impl KvmIrqChip {
796 fn as_mut(&mut self) -> &mut dyn IrqChipArch {
797 match self {
798 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
799 KvmIrqChip::Split(i) => i,
800 KvmIrqChip::Kernel(i) => i,
801 }
802 }
803 }
804
805 let ioapic_host_tube;
806 let mut irq_chip = if cfg.split_irqchip {
807 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
808 unimplemented!("KVM split irqchip mode only supported on x86 processors");
809 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
810 {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700811 let (host_tube, ioapic_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerdc748482021-04-14 13:59:30 -0700812 ioapic_host_tube = Some(host_tube);
813 KvmIrqChip::Split(
814 KvmSplitIrqChip::new(
815 vm_clone,
816 components.vcpu_count,
817 ioapic_device_tube,
818 Some(120),
819 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700820 .context("failed to create IRQ chip")?,
Zach Reiznerdc748482021-04-14 13:59:30 -0700821 )
822 }
823 } else {
824 ioapic_host_tube = None;
825 KvmIrqChip::Kernel(
Daniel Verkamp6b298582021-08-16 15:37:11 -0700826 KvmKernelIrqChip::new(vm_clone, components.vcpu_count)
827 .context("failed to create IRQ chip")?,
Zach Reiznerdc748482021-04-14 13:59:30 -0700828 )
829 };
830
831 run_vm::<KvmVcpu, KvmVm>(cfg, components, vm, irq_chip.as_mut(), ioapic_host_tube)
832}
833
834fn run_vm<Vcpu, V>(
Zach Reiznera90649a2021-03-31 12:56:08 -0700835 cfg: Config,
836 #[allow(unused_mut)] mut components: VmComponents,
Zach Reiznerdc748482021-04-14 13:59:30 -0700837 mut vm: V,
838 irq_chip: &mut dyn IrqChipArch,
839 ioapic_host_tube: Option<Tube>,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -0800840) -> Result<ExitState>
Zach Reiznera90649a2021-03-31 12:56:08 -0700841where
842 Vcpu: VcpuArch + 'static,
843 V: VmArch + 'static,
Zach Reiznera90649a2021-03-31 12:56:08 -0700844{
845 if cfg.sandbox {
846 // Printing something to the syslog before entering minijail so that libc's syslogger has a
847 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
848 // access to those files will not be possible.
849 info!("crosvm entering multiprocess mode");
850 }
851
Daniel Verkampf1439d42021-05-21 13:55:10 -0700852 #[cfg(feature = "usb")]
Zach Reiznera90649a2021-03-31 12:56:08 -0700853 let (usb_control_tube, usb_provider) =
Daniel Verkamp6b298582021-08-16 15:37:11 -0700854 HostBackendDeviceProvider::new().context("failed to create usb provider")?;
Daniel Verkampf1439d42021-05-21 13:55:10 -0700855
Zach Reiznera90649a2021-03-31 12:56:08 -0700856 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
857 // before any jailed devices have been spawned, so that we can catch any of them that fail very
858 // quickly.
Daniel Verkamp6b298582021-08-16 15:37:11 -0700859 let sigchld_fd = SignalFd::new(libc::SIGCHLD).context("failed to create signalfd")?;
Dylan Reid059a1882018-07-23 17:58:09 -0700860
Zach Reiznera60744b2019-02-13 17:33:32 -0800861 let control_server_socket = match &cfg.socket_path {
862 Some(path) => Some(UnlinkUnixSeqpacketListener(
Daniel Verkamp6b298582021-08-16 15:37:11 -0700863 UnixSeqpacketListener::bind(path).context("failed to create control server")?,
Zach Reiznera60744b2019-02-13 17:33:32 -0800864 )),
865 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -0700866 };
Zach Reiznera60744b2019-02-13 17:33:32 -0800867
Zach Reiznera90649a2021-03-31 12:56:08 -0700868 let mut control_tubes = Vec::new();
869
870 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
871 if let Some(port) = cfg.gdb {
872 // GDB needs a control socket to interrupt vcpus.
Daniel Verkamp6b298582021-08-16 15:37:11 -0700873 let (gdb_host_tube, gdb_control_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznera90649a2021-03-31 12:56:08 -0700874 control_tubes.push(TaggedControlTube::Vm(gdb_host_tube));
875 components.gdb = Some((port, gdb_control_tube));
876 }
877
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900878 for wl_cfg in &cfg.vhost_user_wl {
879 let wayland_host_tube = UnixSeqpacket::connect(&wl_cfg.vm_tube)
880 .map(Tube::new)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700881 .context("failed to connect to wayland tube")?;
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900882 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
883 }
884
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900885 let mut vhost_user_gpu_tubes = Vec::with_capacity(cfg.vhost_user_gpu.len());
886 for _ in 0..cfg.vhost_user_gpu.len() {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700887 let (host_tube, device_tube) = Tube::pair().context("failed to create tube")?;
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900888 vhost_user_gpu_tubes.push((
Daniel Verkamp6b298582021-08-16 15:37:11 -0700889 host_tube.try_clone().context("failed to clone tube")?,
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900890 device_tube,
891 ));
892 control_tubes.push(TaggedControlTube::VmMemory(host_tube));
893 }
894
Daniel Verkamp6b298582021-08-16 15:37:11 -0700895 let (wayland_host_tube, wayland_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800896 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
Andrew Walbran3cd93602022-01-25 13:59:23 +0000897
898 let (balloon_host_tube, balloon_device_tube) = if cfg.balloon {
David Stevens8be9ef02022-01-13 22:50:24 +0900899 if let Some(ref path) = cfg.balloon_control {
900 (
901 None,
902 Some(Tube::new(
903 UnixSeqpacket::connect(path).context("failed to create balloon control")?,
904 )),
905 )
906 } else {
907 // Balloon gets a special socket so balloon requests can be forwarded
908 // from the main process.
909 let (host, device) = Tube::pair().context("failed to create tube")?;
910 // Set recv timeout to avoid deadlock on sending BalloonControlCommand
911 // before the guest is ready.
912 host.set_recv_timeout(Some(Duration::from_millis(100)))
913 .context("failed to set timeout")?;
914 (Some(host), Some(device))
915 }
Andrew Walbran3cd93602022-01-25 13:59:23 +0000916 } else {
917 (None, None)
918 };
Dylan Reid059a1882018-07-23 17:58:09 -0700919
Daniel Verkamp92f73d72018-12-04 13:17:46 -0800920 // Create one control socket per disk.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800921 let mut disk_device_tubes = Vec::new();
922 let mut disk_host_tubes = Vec::new();
Daniel Verkamp92f73d72018-12-04 13:17:46 -0800923 let disk_count = cfg.disks.len();
924 for _ in 0..disk_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700925 let (disk_host_tub, disk_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800926 disk_host_tubes.push(disk_host_tub);
927 disk_device_tubes.push(disk_device_tube);
Daniel Verkamp92f73d72018-12-04 13:17:46 -0800928 }
929
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800930 let mut pmem_device_tubes = Vec::new();
Daniel Verkampe1980a92020-02-07 11:00:55 -0800931 let pmem_count = cfg.pmem_devices.len();
932 for _ in 0..pmem_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700933 let (pmem_host_tube, pmem_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800934 pmem_device_tubes.push(pmem_device_tube);
935 control_tubes.push(TaggedControlTube::VmMsync(pmem_host_tube));
Daniel Verkampe1980a92020-02-07 11:00:55 -0800936 }
937
Daniel Verkamp6b298582021-08-16 15:37:11 -0700938 let (gpu_host_tube, gpu_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800939 control_tubes.push(TaggedControlTube::VmMemory(gpu_host_tube));
Gurchetan Singh96beafc2019-05-15 09:46:52 -0700940
Zach Reiznerdc748482021-04-14 13:59:30 -0700941 if let Some(ioapic_host_tube) = ioapic_host_tube {
942 control_tubes.push(TaggedControlTube::VmIrq(ioapic_host_tube));
943 }
Zhuocheng Dingf2e90bf2019-12-02 15:50:20 +0800944
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +0800945 let battery = if cfg.battery_type.is_some() {
Daniel Verkampcfe49462021-08-19 17:11:05 -0700946 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(clippy::manual_map))]
Alex Lauf408c732020-11-10 18:24:04 +0900947 let jail = match simple_jail(&cfg, "battery")? {
Daniel Verkampcfe49462021-08-19 17:11:05 -0700948 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(unused_mut))]
Alex Lauf408c732020-11-10 18:24:04 +0900949 Some(mut jail) => {
950 // Setup a bind mount to the system D-Bus socket if the powerd monitor is used.
951 #[cfg(feature = "power-monitor-powerd")]
952 {
Fergus Dall51200512021-08-19 12:54:26 +1000953 add_current_user_to_jail(&mut jail)?;
Alex Lauf408c732020-11-10 18:24:04 +0900954
955 // Create a tmpfs in the device's root directory so that we can bind mount files.
956 jail.mount_with_data(
957 Path::new("none"),
958 Path::new("/"),
959 "tmpfs",
960 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
961 "size=67108864",
962 )?;
963
964 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
965 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
966 }
967 Some(jail)
968 }
969 None => None,
970 };
971 (&cfg.battery_type, jail)
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +0800972 } else {
973 (&cfg.battery_type, None)
974 };
975
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -0800976 let map_request: Arc<Mutex<Option<ExternalMapping>>> = Arc::new(Mutex::new(None));
977
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +0900978 let fs_count = cfg
979 .shared_dirs
980 .iter()
981 .filter(|sd| sd.kind == SharedDirKind::FS)
982 .count();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800983 let mut fs_device_tubes = Vec::with_capacity(fs_count);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +0900984 for _ in 0..fs_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700985 let (fs_host_tube, fs_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800986 control_tubes.push(TaggedControlTube::Fs(fs_host_tube));
987 fs_device_tubes.push(fs_device_tube);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +0900988 }
989
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -0800990 let mut vvu_proxy_device_tubes = Vec::new();
991 for _ in 0..cfg.vvu_proxy.len() {
992 let (vvu_proxy_host_tube, vvu_proxy_device_tube) =
993 Tube::pair().context("failed to create VVU proxy tube")?;
994 control_tubes.push(TaggedControlTube::VmMemory(vvu_proxy_host_tube));
995 vvu_proxy_device_tubes.push(vvu_proxy_device_tube);
996 }
997
Daniel Verkamp6b298582021-08-16 15:37:11 -0700998 let exit_evt = Event::new().context("failed to create event")?;
Dmitry Torokhovf75699f2021-12-03 11:19:13 -0800999 let reset_evt = Event::new().context("failed to create event")?;
Andrew Walbran1a19c672022-01-24 17:24:10 +00001000 let crash_evt = Event::new().context("failed to create event")?;
Daniel Verkamp6f4f8222022-01-05 14:09:09 -08001001 let mut sys_allocator = Arch::create_system_allocator(&vm);
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09001002
1003 // Allocate the ramoops region first. AArch64::build_vm() assumes this.
1004 let ramoops_region = match &components.pstore {
1005 Some(pstore) => Some(
Dennis Kempin65740a62021-10-18 16:46:57 -07001006 arch::pstore::create_memory_region(&mut vm, &mut sys_allocator, pstore)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001007 .context("failed to allocate pstore region")?,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09001008 ),
1009 None => None,
1010 };
1011
Mattias Nisslerbbd91d02021-12-07 08:57:45 +00001012 create_file_backed_mappings(&cfg, &mut vm, &mut sys_allocator)?;
1013
Daniel Verkamp891ea3e2022-01-04 12:35:55 -08001014 let phys_max_addr = (1u64 << vm.get_guest_phys_addr_bits()) - 1;
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08001015
1016 #[cfg(feature = "gpu")]
1017 // Hold on to the render server jail so it keeps running until we exit run_vm()
Dmitry Torokhove464a7a2022-01-26 13:29:36 -08001018 let (_render_server_jail, render_server_fd) =
1019 if let Some(parameters) = &cfg.gpu_render_server_parameters {
1020 let (jail, fd) = start_gpu_render_server(&cfg, parameters)?;
1021 (Some(ScopedMinijail(jail)), Some(fd))
1022 } else {
1023 (None, None)
1024 };
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08001025
David Stevens06d157a2022-01-13 23:44:48 +09001026 let init_balloon_size = components
1027 .memory_size
1028 .checked_sub(cfg.init_memory.map_or(components.memory_size, |m| {
1029 m.checked_mul(1024 * 1024).unwrap_or(u64::MAX)
1030 }))
1031 .context("failed to calculate init balloon size")?;
1032
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001033 let mut devices = create_devices(
Zach Reiznerdc748482021-04-14 13:59:30 -07001034 &cfg,
1035 &mut vm,
1036 &mut sys_allocator,
1037 &exit_evt,
Zide Chen71435c12021-03-03 15:02:02 -08001038 phys_max_addr,
Zach Reiznerdc748482021-04-14 13:59:30 -07001039 &mut control_tubes,
1040 wayland_device_tube,
1041 gpu_device_tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001042 vhost_user_gpu_tubes,
Zach Reiznerdc748482021-04-14 13:59:30 -07001043 balloon_device_tube,
David Stevens06d157a2022-01-13 23:44:48 +09001044 init_balloon_size,
Zach Reiznerdc748482021-04-14 13:59:30 -07001045 &mut disk_device_tubes,
1046 &mut pmem_device_tubes,
1047 &mut fs_device_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07001048 #[cfg(feature = "usb")]
Zach Reiznerdc748482021-04-14 13:59:30 -07001049 usb_provider,
1050 Arc::clone(&map_request),
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08001051 #[cfg(feature = "gpu")]
1052 render_server_fd,
Abhishek Bhardwaj90fd1642021-11-24 18:26:37 -08001053 &mut vvu_proxy_device_tubes,
Zach Reiznerdc748482021-04-14 13:59:30 -07001054 )?;
1055
Peter Fangc2bba082021-04-19 18:40:24 -07001056 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001057 for device in devices
1058 .iter_mut()
1059 .filter_map(|(dev, _)| dev.as_pci_device_mut())
1060 {
Peter Fangc2bba082021-04-19 18:40:24 -07001061 let sdts = device
1062 .generate_acpi(components.acpi_sdts)
1063 .or_else(|| {
1064 error!("ACPI table generation error");
1065 None
1066 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07001067 .ok_or_else(|| anyhow!("failed to generate ACPI table"))?;
Peter Fangc2bba082021-04-19 18:40:24 -07001068 components.acpi_sdts = sdts;
1069 }
1070
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001071 // KVM_CREATE_VCPU uses apic id for x86 and uses cpu id for others.
1072 let mut kvm_vcpu_ids = Vec::new();
1073
Kuo-Hsin Yang6139da62021-04-14 16:55:24 +08001074 #[cfg_attr(not(feature = "direct"), allow(unused_mut))]
Zach Reiznerdc748482021-04-14 13:59:30 -07001075 let mut linux = Arch::build_vm::<V, Vcpu>(
Trent Begin17ccaad2019-04-17 13:51:25 -06001076 components,
Zach Reiznerdc748482021-04-14 13:59:30 -07001077 &exit_evt,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001078 &reset_evt,
Zach Reiznerdc748482021-04-14 13:59:30 -07001079 &mut sys_allocator,
Trent Begin17ccaad2019-04-17 13:51:25 -06001080 &cfg.serial_parameters,
Matt Delco45caf912019-11-13 08:11:09 -08001081 simple_jail(&cfg, "serial")?,
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08001082 battery,
Zach Reiznera90649a2021-03-31 12:56:08 -07001083 vm,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09001084 ramoops_region,
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001085 devices,
Zach Reiznerdc748482021-04-14 13:59:30 -07001086 irq_chip,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001087 &mut kvm_vcpu_ids,
Trent Begin17ccaad2019-04-17 13:51:25 -06001088 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001089 .context("the architecture failed to build the vm")?;
Lepton Wu60893882018-11-21 11:06:18 -08001090
Daniel Verkamp1286b482021-11-30 15:14:16 -08001091 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1092 {
1093 // Create Pcie Root Port
Daniel Verkamp1286b482021-11-30 15:14:16 -08001094 let sec_bus = (1..255)
1095 .find(|&bus_num| sys_allocator.pci_bus_empty(bus_num))
1096 .context("failed to find empty bus for Pci hotplug")?;
Xiong Zhangd1f6ca12022-01-21 13:48:02 +08001097 let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new(sec_bus)));
1098 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
1099 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
1100 let pci_bridge = Box::new(PciBridge::new(pcie_root_port.clone(), msi_device_tube));
Daniel Verkamp1286b482021-11-30 15:14:16 -08001101 Arch::register_pci_device(&mut linux, pci_bridge, None, &mut sys_allocator)
1102 .context("Failed to configure pci bridge device")?;
1103 linux.hotplug_bus.push(pcie_root_port);
1104 }
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001105
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08001106 #[cfg(feature = "direct")]
1107 if let Some(pmio) = &cfg.direct_pmio {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001108 let direct_io = Arc::new(
1109 devices::DirectIo::new(&pmio.path, false).context("failed to open direct io device")?,
1110 );
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08001111 for range in pmio.ranges.iter() {
1112 linux
1113 .io_bus
Junichi Uekawab180f9c2021-12-07 09:21:36 +09001114 .insert_sync(direct_io.clone(), range.base, range.len)
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08001115 .unwrap();
1116 }
1117 };
1118
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001119 #[cfg(feature = "direct")]
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07001120 if let Some(mmio) = &cfg.direct_mmio {
Xiong Zhang46471a02021-11-12 00:34:42 +08001121 let direct_mmio = Arc::new(
Junichi Uekawab180f9c2021-12-07 09:21:36 +09001122 devices::DirectMmio::new(&mmio.path, false, &mmio.ranges)
Xiong Zhang46471a02021-11-12 00:34:42 +08001123 .context("failed to open direct mmio device")?,
Daniel Verkamp6b298582021-08-16 15:37:11 -07001124 );
Xiong Zhang46471a02021-11-12 00:34:42 +08001125
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07001126 for range in mmio.ranges.iter() {
1127 linux
1128 .mmio_bus
Junichi Uekawab180f9c2021-12-07 09:21:36 +09001129 .insert_sync(direct_mmio.clone(), range.base, range.len)
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07001130 .unwrap();
1131 }
1132 };
1133
1134 #[cfg(feature = "direct")]
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001135 let mut irqs = Vec::new();
1136
1137 #[cfg(feature = "direct")]
1138 for irq in &cfg.direct_level_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07001139 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001140 warn!("irq {} already reserved.", irq);
1141 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07001142 let trigger = Event::new().context("failed to create event")?;
1143 let resample = Event::new().context("failed to create event")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001144 linux
1145 .irq_chip
1146 .register_irq_event(*irq, &trigger, Some(&resample))
1147 .unwrap();
Daniel Verkamp6b298582021-08-16 15:37:11 -07001148 let direct_irq = devices::DirectIrq::new(trigger, Some(resample))
1149 .context("failed to enable interrupt forwarding")?;
1150 direct_irq
1151 .irq_enable(*irq)
1152 .context("failed to enable interrupt forwarding")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001153 irqs.push(direct_irq);
1154 }
1155
1156 #[cfg(feature = "direct")]
1157 for irq in &cfg.direct_edge_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07001158 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001159 warn!("irq {} already reserved.", irq);
1160 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07001161 let trigger = Event::new().context("failed to create event")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001162 linux
1163 .irq_chip
1164 .register_irq_event(*irq, &trigger, None)
1165 .unwrap();
Daniel Verkamp6b298582021-08-16 15:37:11 -07001166 let direct_irq = devices::DirectIrq::new(trigger, None)
1167 .context("failed to enable interrupt forwarding")?;
1168 direct_irq
1169 .irq_enable(*irq)
1170 .context("failed to enable interrupt forwarding")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001171 irqs.push(direct_irq);
1172 }
1173
Daniel Verkamp6b298582021-08-16 15:37:11 -07001174 let gralloc = RutabagaGralloc::new().context("failed to create gralloc")?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001175 run_control(
1176 linux,
Zach Reiznerdc748482021-04-14 13:59:30 -07001177 sys_allocator,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001178 cfg,
Zach Reiznera60744b2019-02-13 17:33:32 -08001179 control_server_socket,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001180 control_tubes,
1181 balloon_host_tube,
1182 &disk_host_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07001183 #[cfg(feature = "usb")]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001184 usb_control_tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07001185 exit_evt,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001186 reset_evt,
Andrew Walbran1a19c672022-01-24 17:24:10 +00001187 crash_evt,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001188 sigchld_fd,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001189 Arc::clone(&map_request),
Gurchetan Singh293913c2020-12-09 10:44:13 -08001190 gralloc,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001191 kvm_vcpu_ids,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001192 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07001193}
1194
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001195fn get_hp_bus<V: VmArch, Vcpu: VcpuArch>(
1196 linux: &RunnableLinuxVm<V, Vcpu>,
1197 host_addr: PciAddress,
1198) -> Result<(Arc<Mutex<dyn HotPlugBus>>, u8)> {
1199 for hp_bus in linux.hotplug_bus.iter() {
1200 if let Some(number) = hp_bus.lock().is_match(host_addr) {
1201 return Ok((hp_bus.clone(), number));
1202 }
1203 }
1204 Err(anyhow!("Failed to find a suitable hotplug bus"))
1205}
1206
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001207fn add_vfio_device<V: VmArch, Vcpu: VcpuArch>(
1208 linux: &mut RunnableLinuxVm<V, Vcpu>,
1209 sys_allocator: &mut SystemAllocator,
1210 cfg: &Config,
1211 control_tubes: &mut Vec<TaggedControlTube>,
1212 vfio_path: &Path,
1213) -> Result<()> {
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001214 let host_os_str = vfio_path
1215 .file_name()
1216 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
1217 let host_str = host_os_str
1218 .to_str()
1219 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
1220 let host_addr = PciAddress::from_string(host_str);
1221
1222 let (hp_bus, bus_num) = get_hp_bus(linux, host_addr)?;
1223
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001224 let mut endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>> = BTreeMap::new();
1225 let (vfio_pci_device, jail) = create_vfio_device(
1226 cfg,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001227 &linux.vm,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001228 sys_allocator,
1229 control_tubes,
1230 vfio_path,
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001231 Some(bus_num),
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001232 &mut endpoints,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001233 None,
1234 IommuDevType::NoIommu,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001235 )?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001236
1237 let pci_address = Arch::register_pci_device(linux, vfio_pci_device, jail, sys_allocator)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001238 .context("Failed to configure pci hotplug device")?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001239
Daniel Verkamp6b298582021-08-16 15:37:11 -07001240 let host_os_str = vfio_path
1241 .file_name()
1242 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
1243 let host_str = host_os_str
1244 .to_str()
1245 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001246 let host_addr = PciAddress::from_string(host_str);
1247 let host_key = HostHotPlugKey::Vfio { host_addr };
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001248 let mut hp_bus = hp_bus.lock();
1249 hp_bus.add_hotplug_device(host_key, pci_address);
1250 hp_bus.hot_plug(pci_address);
1251 Ok(())
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001252}
1253
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001254fn remove_vfio_device<V: VmArch, Vcpu: VcpuArch>(
1255 linux: &RunnableLinuxVm<V, Vcpu>,
Xiong Zhang2d45b912021-05-13 16:22:25 +08001256 sys_allocator: &mut SystemAllocator,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001257 vfio_path: &Path,
1258) -> Result<()> {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001259 let host_os_str = vfio_path
1260 .file_name()
1261 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
1262 let host_str = host_os_str
1263 .to_str()
1264 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001265 let host_addr = PciAddress::from_string(host_str);
1266 let host_key = HostHotPlugKey::Vfio { host_addr };
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001267 for hp_bus in linux.hotplug_bus.iter() {
1268 let mut hp_bus_lock = hp_bus.lock();
1269 if let Some(pci_addr) = hp_bus_lock.get_hotplug_device(host_key) {
1270 hp_bus_lock.hot_unplug(pci_addr);
Xiong Zhang2d45b912021-05-13 16:22:25 +08001271 sys_allocator.release_pci(pci_addr.bus, pci_addr.dev, pci_addr.func);
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001272 return Ok(());
1273 }
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001274 }
1275
Daniel Verkamp6b298582021-08-16 15:37:11 -07001276 Err(anyhow!("HotPlugBus hasn't been implemented"))
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001277}
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001278
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001279fn handle_vfio_command<V: VmArch, Vcpu: VcpuArch>(
1280 linux: &mut RunnableLinuxVm<V, Vcpu>,
1281 sys_allocator: &mut SystemAllocator,
1282 cfg: &Config,
1283 add_tubes: &mut Vec<TaggedControlTube>,
1284 vfio_path: &Path,
1285 add: bool,
1286) -> VmResponse {
1287 let ret = if add {
1288 add_vfio_device(linux, sys_allocator, cfg, add_tubes, vfio_path)
1289 } else {
1290 remove_vfio_device(linux, sys_allocator, vfio_path)
1291 };
1292
1293 match ret {
1294 Ok(()) => VmResponse::Ok,
1295 Err(e) => {
1296 error!("hanlde_vfio_command failure: {}", e);
1297 add_tubes.clear();
1298 VmResponse::Err(base::Error::new(libc::EINVAL))
1299 }
1300 }
1301}
1302
Zach Reiznerdc748482021-04-14 13:59:30 -07001303fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
1304 mut linux: RunnableLinuxVm<V, Vcpu>,
1305 mut sys_allocator: SystemAllocator,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001306 cfg: Config,
Zach Reiznera60744b2019-02-13 17:33:32 -08001307 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001308 mut control_tubes: Vec<TaggedControlTube>,
Andrew Walbran3cd93602022-01-25 13:59:23 +00001309 balloon_host_tube: Option<Tube>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001310 disk_host_tubes: &[Tube],
Daniel Verkampf1439d42021-05-21 13:55:10 -07001311 #[cfg(feature = "usb")] usb_control_tube: Tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07001312 exit_evt: Event,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001313 reset_evt: Event,
Andrew Walbran1a19c672022-01-24 17:24:10 +00001314 crash_evt: Event,
Zach Reizner55a9e502018-10-03 10:22:32 -07001315 sigchld_fd: SignalFd,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001316 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Gurchetan Singh293913c2020-12-09 10:44:13 -08001317 mut gralloc: RutabagaGralloc,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001318 kvm_vcpu_ids: Vec<usize>,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001319) -> Result<ExitState> {
Zach Reizner5bed0d22018-03-28 02:31:11 -07001320 #[derive(PollToken)]
1321 enum Token {
1322 Exit,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001323 Reset,
Andrew Walbran1a19c672022-01-24 17:24:10 +00001324 Crash,
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001325 Suspend,
Zach Reizner5bed0d22018-03-28 02:31:11 -07001326 ChildSignal,
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001327 IrqFd { index: IrqEventIndex },
Zach Reiznera60744b2019-02-13 17:33:32 -08001328 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07001329 VmControl { index: usize },
1330 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001331
Zach Reizner19ad1f32019-12-12 18:58:50 -08001332 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08001333 .set_raw_mode()
1334 .expect("failed to set terminal raw mode");
1335
Michael Hoylee392c462020-10-07 03:29:24 -07001336 let wait_ctx = WaitContext::build_with(&[
Zach Reiznerdc748482021-04-14 13:59:30 -07001337 (&exit_evt, Token::Exit),
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001338 (&reset_evt, Token::Reset),
Andrew Walbran1a19c672022-01-24 17:24:10 +00001339 (&crash_evt, Token::Crash),
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001340 (&linux.suspend_evt, Token::Suspend),
Zach Reiznerb2110be2019-07-23 15:55:03 -07001341 (&sigchld_fd, Token::ChildSignal),
1342 ])
Daniel Verkamp6b298582021-08-16 15:37:11 -07001343 .context("failed to add descriptor to wait context")?;
Zach Reiznerb2110be2019-07-23 15:55:03 -07001344
Zach Reiznera60744b2019-02-13 17:33:32 -08001345 if let Some(socket_server) = &control_server_socket {
Michael Hoylee392c462020-10-07 03:29:24 -07001346 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08001347 .add(socket_server, Token::VmControlServer)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001348 .context("failed to add descriptor to wait context")?;
Zach Reiznera60744b2019-02-13 17:33:32 -08001349 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001350 for (index, socket) in control_tubes.iter().enumerate() {
Michael Hoylee392c462020-10-07 03:29:24 -07001351 wait_ctx
Zach Reizner55a9e502018-10-03 10:22:32 -07001352 .add(socket.as_ref(), Token::VmControl { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07001353 .context("failed to add descriptor to wait context")?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001354 }
1355
Steven Richmanf32d0b42020-06-20 21:45:32 -07001356 let events = linux
1357 .irq_chip
1358 .irq_event_tokens()
Daniel Verkamp6b298582021-08-16 15:37:11 -07001359 .context("failed to add descriptor to wait context")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07001360
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001361 for (index, _gsi, evt) in events {
Michael Hoylee392c462020-10-07 03:29:24 -07001362 wait_ctx
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001363 .add(&evt, Token::IrqFd { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07001364 .context("failed to add descriptor to wait context")?;
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08001365 }
1366
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001367 if cfg.sandbox {
Lepton Wu20333e42019-03-14 10:48:03 -07001368 // Before starting VCPUs, in case we started with some capabilities, drop them all.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001369 drop_capabilities().context("failed to drop process capabilities")?;
Lepton Wu20333e42019-03-14 10:48:03 -07001370 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08001371
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001372 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1373 // Create a channel for GDB thread.
1374 let (to_gdb_channel, from_vcpu_channel) = if linux.gdb.is_some() {
1375 let (s, r) = mpsc::channel();
1376 (Some(s), Some(r))
1377 } else {
1378 (None, None)
1379 };
1380
Steven Richmanf32d0b42020-06-20 21:45:32 -07001381 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
1382 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
Steven Richmanf32d0b42020-06-20 21:45:32 -07001383 let use_hypervisor_signals = !linux
1384 .vm
1385 .get_hypervisor()
Andrew Walbran985491a2022-01-27 13:47:40 +00001386 .check_capability(HypervisorCap::ImmediateExit);
Anton Romanov5acc0f52022-01-28 00:18:11 +00001387 vcpu::setup_vcpu_signal_handler::<Vcpu>(use_hypervisor_signals)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07001388
Zach Reizner304e7312020-09-29 16:00:24 -07001389 let vcpus: Vec<Option<_>> = match linux.vcpus.take() {
Andrew Walbran9cfdbd92021-01-11 17:40:34 +00001390 Some(vec) => vec.into_iter().map(Some).collect(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07001391 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
1392 };
Yusuke Sato31e136a2021-08-18 11:51:38 -07001393 // Enable core scheduling before creating vCPUs so that the cookie will be
1394 // shared by all vCPU threads.
1395 // TODO(b/199312402): Avoid enabling core scheduling for the crosvm process
1396 // itself for even better performance. Only vCPUs need the feature.
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001397 if cfg.per_vm_core_scheduling {
Yusuke Sato31e136a2021-08-18 11:51:38 -07001398 if let Err(e) = enable_core_scheduling() {
1399 error!("Failed to enable core scheduling: {}", e);
1400 }
1401 }
Vineeth Pillai2b6855e2022-01-12 16:57:22 +00001402 let vcpu_cgroup_tasks_file = match &cfg.vcpu_cgroup_path {
1403 None => None,
1404 Some(cgroup_path) => {
1405 // Move main process to cgroup_path
1406 let mut f = File::create(&cgroup_path.join("tasks"))?;
1407 f.write_all(process::id().to_string().as_bytes())?;
1408 Some(f)
1409 }
1410 };
Daniel Verkamp94c35272019-09-12 13:31:30 -07001411 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Dylan Reidb0492662019-05-17 14:50:13 -07001412 let (to_vcpu_channel, from_main_channel) = mpsc::channel();
Daniel Verkampc677fb42020-09-08 13:47:49 -07001413 let vcpu_affinity = match linux.vcpu_affinity.clone() {
1414 Some(VcpuAffinity::Global(v)) => v,
1415 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(),
1416 None => Default::default(),
1417 };
Anton Romanov5acc0f52022-01-28 00:18:11 +00001418 let handle = vcpu::run_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001419 cpu_id,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001420 kvm_vcpu_ids[cpu_id],
Zach Reizner55a9e502018-10-03 10:22:32 -07001421 vcpu,
Daniel Verkamp6b298582021-08-16 15:37:11 -07001422 linux.vm.try_clone().context("failed to clone vm")?,
1423 linux
1424 .irq_chip
1425 .try_box_clone()
1426 .context("failed to clone irqchip")?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001427 linux.vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001428 linux.rt_cpus.contains(&cpu_id),
Daniel Verkampc677fb42020-09-08 13:47:49 -07001429 vcpu_affinity,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09001430 linux.delay_rt,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001431 linux.no_smt,
Zach Reizner55a9e502018-10-03 10:22:32 -07001432 vcpu_thread_barrier.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07001433 linux.has_bios,
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07001434 (*linux.io_bus).clone(),
1435 (*linux.mmio_bus).clone(),
Daniel Verkamp6b298582021-08-16 15:37:11 -07001436 exit_evt.try_clone().context("failed to clone event")?,
Andrew Walbranb28ae8e2022-01-17 14:33:10 +00001437 reset_evt.try_clone().context("failed to clone event")?,
Andrew Walbran1a19c672022-01-24 17:24:10 +00001438 crash_evt.try_clone().context("failed to clone event")?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001439 linux.vm.check_capability(VmCap::PvClockSuspend),
Dylan Reidb0492662019-05-17 14:50:13 -07001440 from_main_channel,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001441 use_hypervisor_signals,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001442 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1443 to_gdb_channel.clone(),
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001444 cfg.per_vm_core_scheduling,
1445 cfg.host_cpu_topology,
Vineeth Pillai2b6855e2022-01-12 16:57:22 +00001446 match vcpu_cgroup_tasks_file {
1447 None => None,
1448 Some(ref f) => Some(
1449 f.try_clone()
1450 .context("failed to clone vcpu cgroup tasks file")?,
1451 ),
1452 },
Zach Reizner55a9e502018-10-03 10:22:32 -07001453 )?;
Dylan Reidb0492662019-05-17 14:50:13 -07001454 vcpu_handles.push((handle, to_vcpu_channel));
Dylan Reid059a1882018-07-23 17:58:09 -07001455 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001456
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001457 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1458 // Spawn GDB thread.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001459 if let Some((gdb_port_num, gdb_control_tube)) = linux.gdb.take() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001460 let to_vcpu_channels = vcpu_handles
1461 .iter()
1462 .map(|(_handle, channel)| channel.clone())
1463 .collect();
1464 let target = GdbStub::new(
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001465 gdb_control_tube,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001466 to_vcpu_channels,
1467 from_vcpu_channel.unwrap(), // Must succeed to unwrap()
1468 );
1469 thread::Builder::new()
1470 .name("gdb".to_owned())
1471 .spawn(move || gdb_thread(target, gdb_port_num))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001472 .context("failed to spawn GDB thread")?;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001473 };
1474
Dylan Reid059a1882018-07-23 17:58:09 -07001475 vcpu_thread_barrier.wait();
1476
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001477 let mut exit_state = ExitState::Stop;
Charles William Dick54045012021-07-27 19:11:53 +09001478 let mut balloon_stats_id: u64 = 0;
1479
Michael Hoylee392c462020-10-07 03:29:24 -07001480 'wait: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07001481 let events = {
Michael Hoylee392c462020-10-07 03:29:24 -07001482 match wait_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001483 Ok(v) => v,
1484 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001485 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001486 break;
1487 }
1488 }
1489 };
Zach Reiznera60744b2019-02-13 17:33:32 -08001490
Steven Richmanf32d0b42020-06-20 21:45:32 -07001491 if let Err(e) = linux.irq_chip.process_delayed_irq_events() {
1492 warn!("can't deliver delayed irqs: {}", e);
1493 }
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08001494
Zach Reiznera60744b2019-02-13 17:33:32 -08001495 let mut vm_control_indices_to_remove = Vec::new();
Michael Hoylee392c462020-10-07 03:29:24 -07001496 for event in events.iter().filter(|e| e.is_readable) {
1497 match event.token {
Zach Reizner5bed0d22018-03-28 02:31:11 -07001498 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001499 info!("vcpu requested shutdown");
Michael Hoylee392c462020-10-07 03:29:24 -07001500 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001501 }
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001502 Token::Reset => {
1503 info!("vcpu requested reset");
1504 exit_state = ExitState::Reset;
1505 break 'wait;
1506 }
Andrew Walbran1a19c672022-01-24 17:24:10 +00001507 Token::Crash => {
1508 info!("vcpu crashed");
1509 exit_state = ExitState::Crash;
1510 break 'wait;
1511 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001512 Token::Suspend => {
1513 info!("VM requested suspend");
1514 linux.suspend_evt.read().unwrap();
Anton Romanov5acc0f52022-01-28 00:18:11 +00001515 vcpu::kick_all_vcpus(
Zach Reiznerdc748482021-04-14 13:59:30 -07001516 &vcpu_handles,
1517 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08001518 VcpuControl::RunState(VmRunMode::Suspending),
Zach Reiznerdc748482021-04-14 13:59:30 -07001519 );
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001520 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001521 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001522 // Print all available siginfo structs, then exit the loop.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001523 while let Some(siginfo) =
1524 sigchld_fd.read().context("failed to create signalfd")?
1525 {
Zach Reizner3ba00982019-01-23 19:04:43 -08001526 let pid = siginfo.ssi_pid;
1527 let pid_label = match linux.pid_debug_label_map.get(&pid) {
1528 Some(label) => format!("{} (pid {})", label, pid),
1529 None => format!("pid {}", pid),
1530 };
David Tolnayf5032762018-12-03 10:46:45 -08001531 error!(
1532 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08001533 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08001534 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08001535 }
Michael Hoylee392c462020-10-07 03:29:24 -07001536 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001537 }
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001538 Token::IrqFd { index } => {
1539 if let Err(e) = linux.irq_chip.service_irq_event(index) {
1540 error!("failed to signal irq {}: {}", index, e);
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08001541 }
1542 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001543 Token::VmControlServer => {
1544 if let Some(socket_server) = &control_server_socket {
1545 match socket_server.accept() {
1546 Ok(socket) => {
Michael Hoylee392c462020-10-07 03:29:24 -07001547 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08001548 .add(
1549 &socket,
1550 Token::VmControl {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001551 index: control_tubes.len(),
Zach Reiznera60744b2019-02-13 17:33:32 -08001552 },
1553 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001554 .context("failed to add descriptor to wait context")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001555 control_tubes.push(TaggedControlTube::Vm(Tube::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08001556 }
1557 Err(e) => error!("failed to accept socket: {}", e),
1558 }
1559 }
1560 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001561 Token::VmControl { index } => {
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001562 let mut add_tubes = Vec::new();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001563 if let Some(socket) = control_tubes.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001564 match socket {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001565 TaggedControlTube::Vm(tube) => match tube.recv::<VmRequest>() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001566 Ok(request) => {
1567 let mut run_mode_opt = None;
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001568 let response = match request {
1569 VmRequest::VfioCommand { vfio_path, add } => {
1570 handle_vfio_command(
1571 &mut linux,
1572 &mut sys_allocator,
1573 &cfg,
1574 &mut add_tubes,
1575 &vfio_path,
1576 add,
1577 )
1578 }
1579 _ => request.execute(
1580 &mut run_mode_opt,
Andrew Walbran3cd93602022-01-25 13:59:23 +00001581 balloon_host_tube.as_ref(),
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001582 &mut balloon_stats_id,
1583 disk_host_tubes,
1584 #[cfg(feature = "usb")]
1585 Some(&usb_control_tube),
1586 #[cfg(not(feature = "usb"))]
1587 None,
1588 &mut linux.bat_control,
1589 &vcpu_handles,
1590 ),
1591 };
1592
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001593 if let Err(e) = tube.send(&response) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001594 error!("failed to send VmResponse: {}", e);
1595 }
1596 if let Some(run_mode) = run_mode_opt {
1597 info!("control socket changed run mode to {}", run_mode);
1598 match run_mode {
1599 VmRunMode::Exiting => {
Michael Hoylee392c462020-10-07 03:29:24 -07001600 break 'wait;
Jakub Starond99cd0a2019-04-11 14:09:39 -07001601 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001602 other => {
Chuanxiao Dong2bbe85c2020-11-12 17:18:07 +08001603 if other == VmRunMode::Running {
Daniel Verkampda4e8a92021-07-21 13:49:02 -07001604 for dev in &linux.resume_notify_devices {
1605 dev.lock().resume_imminent();
1606 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001607 }
Anton Romanov5acc0f52022-01-28 00:18:11 +00001608 vcpu::kick_all_vcpus(
Steven Richman11dc6712020-09-02 15:39:14 -07001609 &vcpu_handles,
Zach Reiznerdc748482021-04-14 13:59:30 -07001610 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08001611 VcpuControl::RunState(other),
Steven Richman11dc6712020-09-02 15:39:14 -07001612 );
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001613 }
1614 }
1615 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001616 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001617 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001618 if let TubeError::Disconnected = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001619 vm_control_indices_to_remove.push(index);
1620 } else {
1621 error!("failed to recv VmRequest: {}", e);
1622 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001623 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001624 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001625 TaggedControlTube::VmMemory(tube) => {
1626 match tube.recv::<VmMemoryRequest>() {
1627 Ok(request) => {
1628 let response = request.execute(
1629 &mut linux.vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07001630 &mut sys_allocator,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001631 Arc::clone(&map_request),
1632 &mut gralloc,
1633 );
1634 if let Err(e) = tube.send(&response) {
1635 error!("failed to send VmMemoryControlResponse: {}", e);
1636 }
1637 }
1638 Err(e) => {
1639 if let TubeError::Disconnected = e {
1640 vm_control_indices_to_remove.push(index);
1641 } else {
1642 error!("failed to recv VmMemoryControlRequest: {}", e);
1643 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001644 }
1645 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001646 }
1647 TaggedControlTube::VmIrq(tube) => match tube.recv::<VmIrqRequest>() {
Xiong Zhang2515b752019-09-19 10:29:02 +08001648 Ok(request) => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001649 let response = {
1650 let irq_chip = &mut linux.irq_chip;
1651 request.execute(
1652 |setup| match setup {
1653 IrqSetup::Event(irq, ev) => {
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001654 if let Some(event_index) = irq_chip
1655 .register_irq_event(irq, ev, None)?
1656 {
1657 match wait_ctx.add(
1658 ev,
1659 Token::IrqFd {
1660 index: event_index
1661 },
1662 ) {
1663 Err(e) => {
1664 warn!("failed to add IrqFd to poll context: {}", e);
1665 Err(e)
1666 },
1667 Ok(_) => {
1668 Ok(())
1669 }
1670 }
1671 } else {
1672 Ok(())
1673 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001674 }
1675 IrqSetup::Route(route) => irq_chip.route_irq(route),
Xiong Zhang4fbc5542021-06-01 11:29:14 +08001676 IrqSetup::UnRegister(irq, ev) => irq_chip.unregister_irq_event(irq, ev),
Steven Richmanf32d0b42020-06-20 21:45:32 -07001677 },
Zach Reiznerdc748482021-04-14 13:59:30 -07001678 &mut sys_allocator,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001679 )
1680 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001681 if let Err(e) = tube.send(&response) {
Xiong Zhang2515b752019-09-19 10:29:02 +08001682 error!("failed to send VmIrqResponse: {}", e);
1683 }
1684 }
1685 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001686 if let TubeError::Disconnected = e {
Xiong Zhang2515b752019-09-19 10:29:02 +08001687 vm_control_indices_to_remove.push(index);
1688 } else {
1689 error!("failed to recv VmIrqRequest: {}", e);
1690 }
1691 }
1692 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001693 TaggedControlTube::VmMsync(tube) => {
1694 match tube.recv::<VmMsyncRequest>() {
1695 Ok(request) => {
1696 let response = request.execute(&mut linux.vm);
1697 if let Err(e) = tube.send(&response) {
1698 error!("failed to send VmMsyncResponse: {}", e);
1699 }
1700 }
1701 Err(e) => {
1702 if let TubeError::Disconnected = e {
1703 vm_control_indices_to_remove.push(index);
1704 } else {
1705 error!("failed to recv VmMsyncRequest: {}", e);
1706 }
Daniel Verkampe1980a92020-02-07 11:00:55 -08001707 }
1708 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001709 }
1710 TaggedControlTube::Fs(tube) => match tube.recv::<FsMappingRequest>() {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001711 Ok(request) => {
1712 let response =
Zach Reiznerdc748482021-04-14 13:59:30 -07001713 request.execute(&mut linux.vm, &mut sys_allocator);
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001714 if let Err(e) = tube.send(&response) {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001715 error!("failed to send VmResponse: {}", e);
1716 }
1717 }
1718 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001719 if let TubeError::Disconnected = e {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001720 vm_control_indices_to_remove.push(index);
1721 } else {
1722 error!("failed to recv VmResponse: {}", e);
1723 }
1724 }
1725 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08001726 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001727 }
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001728 if !add_tubes.is_empty() {
1729 for (idx, socket) in add_tubes.iter().enumerate() {
1730 wait_ctx
1731 .add(
1732 socket.as_ref(),
1733 Token::VmControl {
1734 index: idx + control_tubes.len(),
1735 },
1736 )
1737 .context(
1738 "failed to add hotplug vfio-pci descriptor ot wait context",
1739 )?;
1740 }
1741 control_tubes.append(&mut add_tubes);
1742 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001743 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001744 }
1745 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001746
Vikram Auradkarede68c72021-07-01 14:33:54 -07001747 // It's possible more data is readable and buffered while the socket is hungup,
1748 // so don't delete the tube from the poll context until we're sure all the
1749 // data is read.
1750 // Below case covers a condition where we have received a hungup event and the tube is not
1751 // readable.
1752 // In case of readable tube, once all data is read, any attempt to read more data on hungup
1753 // tube should fail. On such failure, we get Disconnected error and index gets added to
1754 // vm_control_indices_to_remove by the time we reach here.
1755 for event in events.iter().filter(|e| e.is_hungup && !e.is_readable) {
1756 if let Token::VmControl { index } = event.token {
1757 vm_control_indices_to_remove.push(index);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001758 }
1759 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001760
1761 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08001762 // preserves correct indexes as each element is removed.
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07001763 vm_control_indices_to_remove.sort_unstable_by_key(|&k| Reverse(k));
Zach Reiznera60744b2019-02-13 17:33:32 -08001764 vm_control_indices_to_remove.dedup();
1765 for index in vm_control_indices_to_remove {
Michael Hoylee392c462020-10-07 03:29:24 -07001766 // Delete the socket from the `wait_ctx` synchronously. Otherwise, the kernel will do
1767 // this automatically when the FD inserted into the `wait_ctx` is closed after this
Zide Chen89584072019-11-14 10:33:51 -08001768 // if-block, but this removal can be deferred unpredictably. In some instances where the
Michael Hoylee392c462020-10-07 03:29:24 -07001769 // system is under heavy load, we can even get events returned by `wait_ctx` for an FD
Zide Chen89584072019-11-14 10:33:51 -08001770 // that has already been closed. Because the token associated with that spurious event
1771 // now belongs to a different socket, the control loop will start to interact with
1772 // sockets that might not be ready to use. This can cause incorrect hangup detection or
1773 // blocking on a socket that will never be ready. See also: crbug.com/1019986
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001774 if let Some(socket) = control_tubes.get(index) {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001775 wait_ctx
1776 .delete(socket)
1777 .context("failed to remove descriptor from wait context")?;
Zide Chen89584072019-11-14 10:33:51 -08001778 }
1779
1780 // This line implicitly drops the socket at `index` when it gets returned by
1781 // `swap_remove`. After this line, the socket at `index` is not the one from
1782 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
Michael Hoylee392c462020-10-07 03:29:24 -07001783 // use `wait_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001784 control_tubes.swap_remove(index);
1785 if let Some(tube) = control_tubes.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07001786 wait_ctx
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001787 .modify(tube, EventType::Read, Token::VmControl { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07001788 .context("failed to add descriptor to wait context")?;
Zach Reiznera60744b2019-02-13 17:33:32 -08001789 }
1790 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001791 }
1792
Anton Romanov5acc0f52022-01-28 00:18:11 +00001793 vcpu::kick_all_vcpus(
Zach Reiznerdc748482021-04-14 13:59:30 -07001794 &vcpu_handles,
1795 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08001796 VcpuControl::RunState(VmRunMode::Exiting),
Zach Reiznerdc748482021-04-14 13:59:30 -07001797 );
Steven Richman11dc6712020-09-02 15:39:14 -07001798 for (handle, _) in vcpu_handles {
1799 if let Err(e) = handle.join() {
1800 error!("failed to join vcpu thread: {:?}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001801 }
1802 }
1803
Daniel Verkamp94c35272019-09-12 13:31:30 -07001804 // Explicitly drop the VM structure here to allow the devices to clean up before the
1805 // control sockets are closed when this function exits.
1806 mem::drop(linux);
1807
Zach Reizner19ad1f32019-12-12 18:58:50 -08001808 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08001809 .set_canon_mode()
1810 .expect("failed to restore canonical mode for terminal");
1811
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001812 Ok(exit_state)
Zach Reizner39aa26b2017-12-12 18:03:23 -08001813}