blob: 93e298197b2e72274a73dc08bec0aa35590bb553 [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Chuanxiao Dongcb03ec62022-01-20 08:25:38 +08005use std::cmp::{max, Reverse};
Anton Romanov5acc0f52022-01-28 00:18:11 +00006use std::collections::BTreeMap;
7use std::convert::TryInto;
Dylan Reid059a1882018-07-23 17:58:09 -07008use std::fs::{File, OpenOptions};
Vineeth Pillai2b6855e2022-01-12 16:57:22 +00009use std::io::prelude::*;
Federico 'Morg' Pareschia1184822021-09-09 10:52:58 +090010use std::io::stdin;
Steven Richmanf32d0b42020-06-20 21:45:32 -070011use std::iter;
Daniel Verkamp94c35272019-09-12 13:31:30 -070012use std::mem;
Anton Romanovd43ae3c2022-01-31 17:32:54 +000013#[cfg(feature = "gpu")]
14use std::os::unix::net::UnixStream;
15use std::os::unix::prelude::OpenOptionsExt;
16use std::path::Path;
Dylan Reidb0492662019-05-17 14:50:13 -070017use std::sync::{mpsc, Arc, Barrier};
Hikaru Nishida584e52c2021-04-27 17:37:08 +090018use std::time::Duration;
Dylan Reidb0492662019-05-17 14:50:13 -070019
Vineeth Pillai2b6855e2022-01-12 16:57:22 +000020use std::process;
Anton Romanov5acc0f52022-01-28 00:18:11 +000021#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reizner39aa26b2017-12-12 18:03:23 -080022use std::thread;
Zach Reizner39aa26b2017-12-12 18:03:23 -080023
Anton Romanov5acc0f52022-01-28 00:18:11 +000024use libc;
Zach Reizner39aa26b2017-12-12 18:03:23 -080025
Tomasz Jeznach42644642020-05-20 23:27:59 -070026use acpi_tables::sdt::SDT;
27
Daniel Verkamp6b298582021-08-16 15:37:11 -070028use anyhow::{anyhow, bail, Context, Result};
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +090029use base::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080030use base::*;
Anton Romanov5acc0f52022-01-28 00:18:11 +000031use devices::serial_device::SerialHardware;
Zide Chenafdb9382021-06-17 12:04:43 -070032use devices::vfio::{VfioCommonSetup, VfioCommonTrait};
Anton Romanovd43ae3c2022-01-31 17:32:54 +000033#[cfg(feature = "gpu")]
Anton Romanov5acc0f52022-01-28 00:18:11 +000034use devices::virtio::{self, EventDevice};
paulhsiace17e6e2020-08-28 18:37:45 +080035#[cfg(feature = "audio")]
36use devices::Ac97Dev;
Xiong Zhang17b0daf2019-04-23 17:14:50 +080037use devices::{
Anton Romanov5acc0f52022-01-28 00:18:11 +000038 self, BusDeviceObj, HostHotPlugKey, HotPlugBus, IrqEventIndex, KvmKernelIrqChip, PciAddress,
39 PciBridge, PciDevice, PcieRootPort, StubPciDevice, VfioContainer, VirtioPciDevice,
Xiong Zhang17b0daf2019-04-23 17:14:50 +080040};
Chuanxiao Donga8d427b2022-01-07 10:26:24 +080041use devices::{CoIommuDev, IommuDevType};
Daniel Verkampf1439d42021-05-21 13:55:10 -070042#[cfg(feature = "usb")]
43use devices::{HostBackendDeviceProvider, XhciController};
Steven Richmanf32d0b42020-06-20 21:45:32 -070044use hypervisor::kvm::{Kvm, KvmVcpu, KvmVm};
Anton Romanov5acc0f52022-01-28 00:18:11 +000045use hypervisor::{HypervisorCap, ProtectionType, Vm, VmCap};
Allen Webbf3024c82020-06-19 07:19:48 -070046use minijail::{self, Minijail};
Anton Romanov5acc0f52022-01-28 00:18:11 +000047use resources::{Alloc, SystemAllocator};
Gurchetan Singh293913c2020-12-09 10:44:13 -080048use rutabaga_gfx::RutabagaGralloc;
Dylan Reidb0492662019-05-17 14:50:13 -070049use sync::Mutex;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080050use vm_control::*;
Sergey Senozhatskyd78d05b2021-04-13 20:59:58 +090051use vm_memory::{GuestAddress, GuestMemory, MemoryPolicy};
Zach Reizner39aa26b2017-12-12 18:03:23 -080052
Keiichi Watanabec5262e92020-10-21 15:57:33 +090053#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
54use crate::gdb::{gdb_thread, GdbStub};
Anton Romanovd43ae3c2022-01-31 17:32:54 +000055use crate::{Config, Executable, SharedDir, SharedDirKind, VfioType};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070056use arch::{
Keiichi Watanabe553d2192021-08-16 16:42:27 +090057 self, LinuxArch, RunnableLinuxVm, VcpuAffinity, VirtioDeviceStub, VmComponents, VmImage,
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070058};
Sonny Raoed517d12018-02-13 22:09:43 -080059
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080060#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070061use {
62 aarch64::AArch64 as Arch,
Steven Richman11dc6712020-09-02 15:39:14 -070063 devices::IrqChipAArch64 as IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -070064 hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
65};
Zach Reizner55a9e502018-10-03 10:22:32 -070066#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070067use {
Steven Richman11dc6712020-09-02 15:39:14 -070068 devices::{IrqChipX86_64 as IrqChipArch, KvmSplitIrqChip},
69 hypervisor::{VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
Steven Richmanf32d0b42020-06-20 21:45:32 -070070 x86_64::X8664arch as Arch,
71};
Zach Reizner39aa26b2017-12-12 18:03:23 -080072
Anton Romanov5acc0f52022-01-28 00:18:11 +000073mod device_helpers;
74use device_helpers::*;
75mod jail_helpers;
76use jail_helpers::*;
77mod vcpu;
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +090078
David Tolnay2b089fc2019-03-04 15:33:22 -080079#[cfg(feature = "gpu")]
Anton Romanov5acc0f52022-01-28 00:18:11 +000080mod gpu;
Chirantan Ekbote44292f52021-06-25 18:31:41 +090081#[cfg(feature = "gpu")]
Anton Romanov5acc0f52022-01-28 00:18:11 +000082use gpu::*;
Jorge E. Moreirad4562d02021-06-28 16:21:12 -070083
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080084// gpu_device_tube is not used when GPU support is disabled.
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -070085#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -080086fn create_virtio_devices(
87 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -070088 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -070089 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -070090 _exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080091 wayland_device_tube: Tube,
92 gpu_device_tube: Tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +090093 vhost_user_gpu_tubes: Vec<(Tube, Tube)>,
Andrew Walbran3cd93602022-01-25 13:59:23 +000094 balloon_device_tube: Option<Tube>,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +080095 balloon_inflate_tube: Option<Tube>,
David Stevens06d157a2022-01-13 23:44:48 +090096 init_balloon_size: u64,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080097 disk_device_tubes: &mut Vec<Tube>,
98 pmem_device_tubes: &mut Vec<Tube>,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -080099 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800100 fs_device_tubes: &mut Vec<Tube>,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -0800101 #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800102) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -0700103 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -0800104
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -0700105 for (_, param) in cfg
106 .serial_parameters
107 .iter()
108 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
109 {
110 let dev = create_console_device(cfg, param)?;
111 devs.push(dev);
112 }
113
Zach Reizner8fb52112017-12-13 16:04:39 -0800114 for disk in &cfg.disks {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800115 let disk_device_tube = disk_device_tubes.remove(0);
116 devs.push(create_block_device(cfg, disk, disk_device_tube)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800117 }
118
Keiichi Watanabef3a37f42021-01-21 15:41:11 +0900119 for blk in &cfg.vhost_user_blk {
120 devs.push(create_vhost_user_block_device(cfg, blk)?);
121 }
122
Federico 'Morg' Pareschi70fc7de2021-04-08 15:43:13 +0900123 for console in &cfg.vhost_user_console {
124 devs.push(create_vhost_user_console_device(cfg, console)?);
125 }
126
Jakub Starona3411ea2019-04-24 10:55:25 -0700127 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800128 let pmem_device_tube = pmem_device_tubes.remove(0);
Daniel Verkampe1980a92020-02-07 11:00:55 -0800129 devs.push(create_pmem_device(
130 cfg,
131 vm,
132 resources,
133 pmem_disk,
134 index,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800135 pmem_device_tube,
Daniel Verkampe1980a92020-02-07 11:00:55 -0800136 )?);
Jakub Starona3411ea2019-04-24 10:55:25 -0700137 }
138
David Tolnay2b089fc2019-03-04 15:33:22 -0800139 devs.push(create_rng_device(cfg)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800140
David Tolnayde6b29a2018-12-20 11:49:46 -0800141 #[cfg(feature = "tpm")]
142 {
David Tolnay43f8e212019-02-13 17:28:16 -0800143 if cfg.software_tpm {
David Tolnay2b089fc2019-03-04 15:33:22 -0800144 devs.push(create_tpm_device(cfg)?);
David Tolnay43f8e212019-02-13 17:28:16 -0800145 }
David Tolnayde6b29a2018-12-20 11:49:46 -0800146 }
147
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700148 for (idx, single_touch_spec) in cfg.virtio_single_touch.iter().enumerate() {
149 devs.push(create_single_touch_device(
150 cfg,
151 single_touch_spec,
152 idx as u32,
153 )?);
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800154 }
155
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700156 for (idx, multi_touch_spec) in cfg.virtio_multi_touch.iter().enumerate() {
157 devs.push(create_multi_touch_device(
158 cfg,
159 multi_touch_spec,
160 idx as u32,
161 )?);
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000162 }
163
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700164 for (idx, trackpad_spec) in cfg.virtio_trackpad.iter().enumerate() {
165 devs.push(create_trackpad_device(cfg, trackpad_spec, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800166 }
167
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700168 for (idx, mouse_socket) in cfg.virtio_mice.iter().enumerate() {
169 devs.push(create_mouse_device(cfg, mouse_socket, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800170 }
171
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700172 for (idx, keyboard_socket) in cfg.virtio_keyboard.iter().enumerate() {
173 devs.push(create_keyboard_device(cfg, keyboard_socket, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800174 }
175
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700176 for (idx, switches_socket) in cfg.virtio_switches.iter().enumerate() {
177 devs.push(create_switches_device(cfg, switches_socket, idx as u32)?);
Daniel Norman5e23df72021-03-11 10:11:02 -0800178 }
179
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800180 for dev_path in &cfg.virtio_input_evdevs {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700181 devs.push(create_vinput_device(cfg, dev_path)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800182 }
183
Andrew Walbran3cd93602022-01-25 13:59:23 +0000184 if let Some(balloon_device_tube) = balloon_device_tube {
185 devs.push(create_balloon_device(
186 cfg,
187 balloon_device_tube,
188 balloon_inflate_tube,
David Stevens06d157a2022-01-13 23:44:48 +0900189 init_balloon_size,
Andrew Walbran3cd93602022-01-25 13:59:23 +0000190 )?);
191 }
Dylan Reid295ccac2017-11-06 14:06:24 -0800192
Zach Reizner39aa26b2017-12-12 18:03:23 -0800193 // We checked above that if the IP is defined, then the netmask is, too.
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800194 for tap_fd in &cfg.tap_fd {
Alexandre Courbot911773a2021-12-10 14:31:10 +0900195 devs.push(create_tap_net_device_from_fd(cfg, *tap_fd)?);
Jorge E. Moreirab7952802019-02-12 16:43:05 -0800196 }
197
David Tolnay2b089fc2019-03-04 15:33:22 -0800198 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
199 (cfg.host_ip, cfg.netmask, cfg.mac_address)
200 {
Keiichi Watanabe60686582021-03-12 04:53:51 +0900201 if !cfg.vhost_user_net.is_empty() {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700202 bail!("vhost-user-net cannot be used with any of --host_ip, --netmask or --mac");
Keiichi Watanabe60686582021-03-12 04:53:51 +0900203 }
Alexandre Courbot911773a2021-12-10 14:31:10 +0900204 devs.push(create_net_device_from_config(
205 cfg,
206 host_ip,
207 netmask,
208 mac_address,
209 )?);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800210 }
211
Alexandre Courbot993aa7f2021-12-09 14:51:29 +0900212 for tap_name in &cfg.tap_name {
213 devs.push(create_tap_net_device_from_name(cfg, tap_name.as_bytes())?);
214 }
215
Keiichi Watanabe60686582021-03-12 04:53:51 +0900216 for net in &cfg.vhost_user_net {
217 devs.push(create_vhost_user_net_device(cfg, net)?);
218 }
219
Chirantan Ekbote84091e52021-09-10 18:43:17 +0900220 for vsock in &cfg.vhost_user_vsock {
221 devs.push(create_vhost_user_vsock_device(cfg, vsock)?);
222 }
223
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900224 for opt in &cfg.vhost_user_wl {
225 devs.push(create_vhost_user_wl_device(cfg, opt)?);
226 }
227
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900228 #[cfg(feature = "gpu")]
229 for (opt, (host_tube, device_tube)) in cfg.vhost_user_gpu.iter().zip(vhost_user_gpu_tubes) {
230 devs.push(create_vhost_user_gpu_device(
231 cfg,
232 opt,
233 host_tube,
234 device_tube,
235 )?);
236 }
237
Abhishek Bhardwaj103c1b72021-11-01 15:52:23 -0700238 for opt in &cfg.vvu_proxy {
239 devs.push(create_vvu_proxy_device(cfg, opt)?);
240 }
241
David Tolnayfa701712019-02-13 16:42:54 -0800242 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800243 let mut resource_bridges = Vec::<Tube>::new();
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900244
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900245 if !cfg.wayland_socket_paths.is_empty() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900246 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800247 let mut wl_resource_bridge = None::<Tube>;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900248
249 #[cfg(feature = "gpu")]
250 {
Jason Macnakcc7070b2019-11-06 14:48:12 -0800251 if cfg.gpu_parameters.is_some() {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700252 let (wl_socket, gpu_socket) = Tube::pair().context("failed to create tube")?;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900253 resource_bridges.push(gpu_socket);
254 wl_resource_bridge = Some(wl_socket);
255 }
256 }
257
258 devs.push(create_wayland_device(
259 cfg,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800260 wayland_device_tube,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900261 wl_resource_bridge,
262 )?);
263 }
David Tolnayfa701712019-02-13 16:42:54 -0800264
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900265 #[cfg(feature = "video-decoder")]
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900266 let video_dec_cfg = if let Some(backend) = cfg.video_dec {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700267 let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
Daniel Verkampffb59122021-03-18 14:06:15 -0700268 resource_bridges.push(gpu_tube);
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900269 Some((video_tube, backend))
Daniel Verkampffb59122021-03-18 14:06:15 -0700270 } else {
271 None
272 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900273
274 #[cfg(feature = "video-encoder")]
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900275 let video_enc_cfg = if let Some(backend) = cfg.video_enc {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700276 let (video_tube, gpu_tube) = Tube::pair().context("failed to create tube")?;
Daniel Verkampffb59122021-03-18 14:06:15 -0700277 resource_bridges.push(gpu_tube);
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900278 Some((video_tube, backend))
Daniel Verkampffb59122021-03-18 14:06:15 -0700279 } else {
280 None
281 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900282
Zach Reizner3a8100a2017-09-13 19:15:43 -0700283 #[cfg(feature = "gpu")]
284 {
Noah Golddc7f52b2020-02-01 13:01:58 -0800285 if let Some(gpu_parameters) = &cfg.gpu_parameters {
Anton Romanov5acc0f52022-01-28 00:18:11 +0000286 let mut gpu_display_w = virtio::DEFAULT_DISPLAY_WIDTH;
287 let mut gpu_display_h = virtio::DEFAULT_DISPLAY_HEIGHT;
Jason Macnakd659a0d2021-03-15 15:33:01 -0700288 if !gpu_parameters.displays.is_empty() {
289 gpu_display_w = gpu_parameters.displays[0].width;
290 gpu_display_h = gpu_parameters.displays[0].height;
291 }
292
Zach Reizner65b98f12019-11-22 17:34:58 -0800293 let mut event_devices = Vec::new();
294 if cfg.display_window_mouse {
295 let (event_device_socket, virtio_dev_socket) =
Daniel Verkamp6b298582021-08-16 15:37:11 -0700296 UnixStream::pair().context("failed to create socket")?;
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000297 let (multi_touch_width, multi_touch_height) = cfg
298 .virtio_multi_touch
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700299 .first()
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800300 .as_ref()
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000301 .map(|multi_touch_spec| multi_touch_spec.get_size())
Jason Macnakd659a0d2021-03-15 15:33:01 -0700302 .unwrap_or((gpu_display_w, gpu_display_h));
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000303 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700304 // u32::MAX is the least likely to collide with the indices generated above for
305 // the multi_touch options, which begin at 0.
306 u32::MAX,
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800307 virtio_dev_socket,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000308 multi_touch_width,
309 multi_touch_height,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700310 virtio::base_features(cfg.protected_vm),
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800311 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700312 .context("failed to set up mouse device")?;
Zach Reizner65b98f12019-11-22 17:34:58 -0800313 devs.push(VirtioDeviceStub {
314 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700315 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -0800316 });
317 event_devices.push(EventDevice::touchscreen(event_device_socket));
318 }
319 if cfg.display_window_keyboard {
320 let (event_device_socket, virtio_dev_socket) =
Daniel Verkamp6b298582021-08-16 15:37:11 -0700321 UnixStream::pair().context("failed to create socket")?;
Noah Goldd4ca29b2020-10-27 12:21:52 -0700322 let dev = virtio::new_keyboard(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700323 // u32::MAX is the least likely to collide with the indices generated above for
324 // the multi_touch options, which begin at 0.
325 u32::MAX,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700326 virtio_dev_socket,
327 virtio::base_features(cfg.protected_vm),
328 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700329 .context("failed to set up keyboard device")?;
Zach Reizner65b98f12019-11-22 17:34:58 -0800330 devs.push(VirtioDeviceStub {
331 dev: Box::new(dev),
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700332 jail: simple_jail(cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -0800333 });
334 event_devices.push(EventDevice::keyboard(event_device_socket));
335 }
Chia-I Wu16fb6592021-11-10 11:45:32 -0800336
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700337 devs.push(create_gpu_device(
338 cfg,
339 _exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800340 gpu_device_tube,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700341 resource_bridges,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900342 // Use the unnamed socket for GPU display screens.
343 cfg.wayland_socket_paths.get(""),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700344 cfg.x_display.clone(),
Chia-I Wu16fb6592021-11-10 11:45:32 -0800345 render_server_fd,
Zach Reizner65b98f12019-11-22 17:34:58 -0800346 event_devices,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -0800347 map_request,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700348 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -0700349 }
350 }
351
Chih-Yang Hsiae31731c2022-01-05 17:30:28 +0800352 #[cfg(feature = "audio_cras")]
353 {
354 for cras_snd in &cfg.cras_snds {
355 devs.push(create_cras_snd_device(cfg, cras_snd.clone())?);
356 }
357 }
358
Daniel Verkampffb59122021-03-18 14:06:15 -0700359 #[cfg(feature = "video-decoder")]
360 {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900361 if let Some((video_dec_tube, video_dec_backend)) = video_dec_cfg {
Daniel Verkampffb59122021-03-18 14:06:15 -0700362 register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900363 video_dec_backend,
Daniel Verkampffb59122021-03-18 14:06:15 -0700364 &mut devs,
365 video_dec_tube,
366 cfg,
367 devices::virtio::VideoDeviceType::Decoder,
368 )?;
369 }
370 }
371
372 #[cfg(feature = "video-encoder")]
373 {
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900374 if let Some((video_enc_tube, video_enc_backend)) = video_enc_cfg {
Daniel Verkampffb59122021-03-18 14:06:15 -0700375 register_video_device(
Alexandre Courbotb42b3e52021-07-09 23:38:57 +0900376 video_enc_backend,
Daniel Verkampffb59122021-03-18 14:06:15 -0700377 &mut devs,
378 video_enc_tube,
379 cfg,
380 devices::virtio::VideoDeviceType::Encoder,
381 )?;
382 }
383 }
384
Zach Reizneraa575662018-08-15 10:46:32 -0700385 if let Some(cid) = cfg.cid {
Chirantan Ekbote3e8d52b2021-09-10 18:27:16 +0900386 devs.push(create_vhost_vsock_device(cfg, cid)?);
Zach Reizneraa575662018-08-15 10:46:32 -0700387 }
388
Woody Chow5890b702021-02-12 14:57:02 +0900389 for vhost_user_fs in &cfg.vhost_user_fs {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700390 devs.push(create_vhost_user_fs_device(cfg, vhost_user_fs)?);
Woody Chow5890b702021-02-12 14:57:02 +0900391 }
392
Woody Chow1b16db12021-04-02 16:59:59 +0900393 #[cfg(feature = "audio")]
394 for vhost_user_snd in &cfg.vhost_user_snd {
395 devs.push(create_vhost_user_snd_device(cfg, vhost_user_snd)?);
396 }
397
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900398 for shared_dir in &cfg.shared_dirs {
399 let SharedDir {
400 src,
401 tag,
402 kind,
403 uid_map,
404 gid_map,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +0900405 fs_cfg,
406 p9_cfg,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900407 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -0800408
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900409 let dev = match kind {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +0900410 SharedDirKind::FS => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800411 let device_tube = fs_device_tubes.remove(0);
412 create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone(), device_tube)?
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +0900413 }
Chirantan Ekbote75ba8752020-10-27 18:33:02 +0900414 SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag, p9_cfg.clone())?,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900415 };
416 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -0800417 }
418
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900419 if let Some(vhost_user_mac80211_hwsim) = &cfg.vhost_user_mac80211_hwsim {
420 devs.push(create_vhost_user_mac80211_hwsim_device(
421 cfg,
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700422 vhost_user_mac80211_hwsim,
JaeMan Parkeb9cc532021-07-02 15:02:59 +0900423 )?);
424 }
425
Jorge E. Moreirad4562d02021-06-28 16:21:12 -0700426 #[cfg(feature = "audio")]
427 if let Some(path) = &cfg.sound {
Daniel Verkamp166d1dd2021-08-19 17:05:29 -0700428 devs.push(create_sound_device(path, cfg)?);
Jorge E. Moreirad4562d02021-06-28 16:21:12 -0700429 }
430
David Tolnay2b089fc2019-03-04 15:33:22 -0800431 Ok(devs)
432}
433
434fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -0600435 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -0700436 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -0700437 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -0700438 exit_evt: &Event,
Zide Chen71435c12021-03-03 15:02:02 -0800439 phys_max_addr: u64,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800440 control_tubes: &mut Vec<TaggedControlTube>,
441 wayland_device_tube: Tube,
442 gpu_device_tube: Tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900443 vhost_user_gpu_tubes: Vec<(Tube, Tube)>,
Andrew Walbran3cd93602022-01-25 13:59:23 +0000444 balloon_device_tube: Option<Tube>,
David Stevens06d157a2022-01-13 23:44:48 +0900445 init_balloon_size: u64,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800446 disk_device_tubes: &mut Vec<Tube>,
447 pmem_device_tubes: &mut Vec<Tube>,
448 fs_device_tubes: &mut Vec<Tube>,
Daniel Verkampf1439d42021-05-21 13:55:10 -0700449 #[cfg(feature = "usb")] usb_provider: HostBackendDeviceProvider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -0800450 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -0800451 #[cfg(feature = "gpu")] render_server_fd: Option<SafeDescriptor>,
Tomasz Nowickiab86d522021-09-22 05:50:46 +0000452) -> DeviceResult<Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>> {
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800453 let mut devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)> = Vec::new();
454 let mut balloon_inflate_tube: Option<Tube> = None;
Zide Chen5deee482021-04-19 11:06:01 -0700455 if !cfg.vfio.is_empty() {
Zide Chendfc4b882021-03-10 16:35:37 -0800456 let mut iommu_attached_endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>> =
457 BTreeMap::new();
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800458 let mut coiommu_attached_endpoints = Vec::new();
Zide Chendfc4b882021-03-10 16:35:37 -0800459
Tomasz Nowicki71aca792021-06-09 18:53:49 +0000460 for vfio_dev in cfg
461 .vfio
462 .iter()
463 .filter(|dev| dev.get_type() == VfioType::Pci)
464 {
465 let vfio_path = &vfio_dev.vfio_path;
Zide Chen5deee482021-04-19 11:06:01 -0700466 let (vfio_pci_device, jail) = create_vfio_device(
467 cfg,
468 vm,
469 resources,
470 control_tubes,
471 vfio_path.as_path(),
Xiong Zhangf82f2dc2021-05-21 16:54:12 +0800472 None,
Zide Chendfc4b882021-03-10 16:35:37 -0800473 &mut iommu_attached_endpoints,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800474 Some(&mut coiommu_attached_endpoints),
475 vfio_dev.iommu_dev_type(),
Zide Chen5deee482021-04-19 11:06:01 -0700476 )?;
Zide Chendfc4b882021-03-10 16:35:37 -0800477
Tomasz Nowickiab86d522021-09-22 05:50:46 +0000478 devices.push((vfio_pci_device, jail));
Zide Chen5deee482021-04-19 11:06:01 -0700479 }
Zide Chendfc4b882021-03-10 16:35:37 -0800480
Tomasz Nowicki344eb142021-09-22 05:51:58 +0000481 for vfio_dev in cfg
482 .vfio
483 .iter()
484 .filter(|dev| dev.get_type() == VfioType::Platform)
485 {
486 let vfio_path = &vfio_dev.vfio_path;
487 let (vfio_plat_dev, jail) = create_vfio_platform_device(
488 cfg,
489 vm,
490 resources,
491 control_tubes,
492 vfio_path.as_path(),
493 &mut iommu_attached_endpoints,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800494 IommuDevType::NoIommu, // Virtio IOMMU is not supported yet
Tomasz Nowicki344eb142021-09-22 05:51:58 +0000495 )?;
496
497 devices.push((Box::new(vfio_plat_dev), jail));
498 }
499
Chuanxiao Dongcb03ec62022-01-20 08:25:38 +0800500 if !coiommu_attached_endpoints.is_empty() || !iommu_attached_endpoints.is_empty() {
501 let mut buf = mem::MaybeUninit::<libc::rlimit>::zeroed();
502 let res = unsafe { libc::getrlimit(libc::RLIMIT_MEMLOCK, buf.as_mut_ptr()) };
503 if res == 0 {
504 let limit = unsafe { buf.assume_init() };
505 let rlim_new = limit
506 .rlim_cur
507 .saturating_add(vm.get_memory().memory_size() as libc::rlim_t);
508 let rlim_max = max(limit.rlim_max, rlim_new);
509 if limit.rlim_cur < rlim_new {
510 let limit_arg = libc::rlimit {
511 rlim_cur: rlim_new as libc::rlim_t,
512 rlim_max: rlim_max as libc::rlim_t,
513 };
514 let res = unsafe { libc::setrlimit(libc::RLIMIT_MEMLOCK, &limit_arg) };
515 if res != 0 {
516 bail!("Set rlimit failed");
517 }
518 }
519 } else {
520 bail!("Get rlimit failed");
521 }
522 }
523
Zide Chendfc4b882021-03-10 16:35:37 -0800524 if !iommu_attached_endpoints.is_empty() {
Zide Chen71435c12021-03-03 15:02:02 -0800525 let iommu_dev = create_iommu_device(cfg, phys_max_addr, iommu_attached_endpoints)?;
Zide Chendfc4b882021-03-10 16:35:37 -0800526
Daniel Verkamp6b298582021-08-16 15:37:11 -0700527 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
Zide Chendfc4b882021-03-10 16:35:37 -0800528 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
Peter Fangad3b24e2021-06-21 00:43:29 -0700529 let mut dev =
530 VirtioPciDevice::new(vm.get_memory().clone(), iommu_dev.dev, msi_device_tube)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700531 .context("failed to create virtio pci dev")?;
Peter Fangad3b24e2021-06-21 00:43:29 -0700532 // early reservation for viommu.
533 dev.allocate_address(resources)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700534 .context("failed to allocate resources early for virtio pci dev")?;
Peter Fangad3b24e2021-06-21 00:43:29 -0700535 let dev = Box::new(dev);
Tomasz Nowickiab86d522021-09-22 05:50:46 +0000536 devices.push((dev, iommu_dev.jail));
Zide Chendfc4b882021-03-10 16:35:37 -0800537 }
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800538
539 if !coiommu_attached_endpoints.is_empty() {
540 let vfio_container =
541 VfioCommonSetup::vfio_get_container(IommuDevType::CoIommu, None as Option<&Path>)
542 .context("failed to get vfio container")?;
543 let (coiommu_host_tube, coiommu_device_tube) =
544 Tube::pair().context("failed to create coiommu tube")?;
545 control_tubes.push(TaggedControlTube::VmMemory(coiommu_host_tube));
546 let vcpu_count = cfg.vcpu_count.unwrap_or(1) as u64;
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800547 let (coiommu_tube, balloon_tube) =
548 Tube::pair().context("failed to create coiommu tube")?;
549 balloon_inflate_tube = Some(balloon_tube);
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800550 let dev = CoIommuDev::new(
551 vm.get_memory().clone(),
552 vfio_container,
553 coiommu_device_tube,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800554 coiommu_tube,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800555 coiommu_attached_endpoints,
556 vcpu_count,
Chuanxiao Dongd4468612022-01-14 14:21:17 +0800557 cfg.coiommu_param.unwrap_or_default(),
Chuanxiao Donga8d427b2022-01-07 10:26:24 +0800558 )
559 .context("failed to create coiommu device")?;
560
561 devices.push((Box::new(dev), simple_jail(cfg, "coiommu")?));
562 }
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800563 }
564
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800565 let stubs = create_virtio_devices(
566 cfg,
567 vm,
568 resources,
569 exit_evt,
570 wayland_device_tube,
571 gpu_device_tube,
572 vhost_user_gpu_tubes,
573 balloon_device_tube,
574 balloon_inflate_tube,
David Stevens06d157a2022-01-13 23:44:48 +0900575 init_balloon_size,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800576 disk_device_tubes,
577 pmem_device_tubes,
578 map_request,
579 fs_device_tubes,
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -0800580 #[cfg(feature = "gpu")]
581 render_server_fd,
Chuanxiao Dong146a13b2021-12-09 12:59:54 +0800582 )?;
583
584 for stub in stubs {
585 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
586 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
587 let dev = VirtioPciDevice::new(vm.get_memory().clone(), stub.dev, msi_device_tube)
588 .context("failed to create virtio pci dev")?;
589 let dev = Box::new(dev) as Box<dyn BusDeviceObj>;
590 devices.push((dev, stub.jail));
591 }
592
593 #[cfg(feature = "audio")]
594 for ac97_param in &cfg.ac97_parameters {
595 let dev = Ac97Dev::try_new(vm.get_memory().clone(), ac97_param.clone())
596 .context("failed to create ac97 device")?;
597 let jail = simple_jail(cfg, dev.minijail_policy())?;
598 devices.push((Box::new(dev), jail));
599 }
600
601 #[cfg(feature = "usb")]
602 {
603 // Create xhci controller.
604 let usb_controller = Box::new(XhciController::new(vm.get_memory().clone(), usb_provider));
605 devices.push((usb_controller, simple_jail(cfg, "xhci")?));
606 }
607
Mattias Nisslerde2c6402021-10-21 12:05:29 +0000608 for params in &cfg.stub_pci_devices {
609 // Stub devices don't need jailing since they don't do anything.
610 devices.push((Box::new(StubPciDevice::new(params)), None));
611 }
612
Tomasz Nowickiab86d522021-09-22 05:50:46 +0000613 Ok(devices)
David Tolnay2b089fc2019-03-04 15:33:22 -0800614}
615
Mattias Nisslerbbd91d02021-12-07 08:57:45 +0000616fn create_file_backed_mappings(
617 cfg: &Config,
618 vm: &mut impl Vm,
619 resources: &mut SystemAllocator,
620) -> Result<()> {
621 for mapping in &cfg.file_backed_mappings {
622 let file = OpenOptions::new()
623 .read(true)
624 .write(mapping.writable)
625 .custom_flags(if mapping.sync { libc::O_SYNC } else { 0 })
626 .open(&mapping.path)
627 .context("failed to open file for file-backed mapping")?;
628 let prot = if mapping.writable {
629 Protection::read_write()
630 } else {
631 Protection::read()
632 };
633 let size = mapping
634 .size
635 .try_into()
636 .context("Invalid size for file-backed mapping")?;
637 let memory_mapping = MemoryMappingBuilder::new(size)
638 .from_file(&file)
639 .offset(mapping.offset)
640 .protection(prot)
641 .build()
642 .context("failed to map backing file for file-backed mapping")?;
643
644 resources
645 .mmio_allocator_any()
646 .allocate_at(
647 mapping.address,
648 mapping.size,
649 Alloc::FileBacked(mapping.address),
650 "file-backed mapping".to_owned(),
651 )
652 .context("failed to allocate guest address for file-backed mapping")?;
653
654 vm.add_memory_region(
655 GuestAddress(mapping.address),
656 Box::new(memory_mapping),
657 !mapping.writable,
658 /* log_dirty_pages = */ false,
659 )
660 .context("failed to configure file-backed mapping")?;
661 }
662
663 Ok(())
664}
665
Zach Reiznera90649a2021-03-31 12:56:08 -0700666fn setup_vm_components(cfg: &Config) -> Result<VmComponents> {
David Tolnay2b089fc2019-03-04 15:33:22 -0800667 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +0100668 Some(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +0900669 open_file(
670 initrd_path,
671 true, /*read_only*/
672 false, /*O_DIRECT*/
673 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700674 .with_context(|| format!("failed to open initrd {}", initrd_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +0100675 )
Daniel Verkampe403f5c2018-12-11 16:29:26 -0800676 } else {
677 None
678 };
679
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700680 let vm_image = match cfg.executable_path {
Andrew Walbranbc55e302021-07-13 17:35:10 +0100681 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +0900682 open_file(
683 kernel_path,
684 true, /*read_only*/
685 false, /*O_DIRECT*/
686 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700687 .with_context(|| format!("failed to open kernel image {}", kernel_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +0100688 ),
689 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
Junichi Uekawa7bea39f2021-07-16 14:05:06 +0900690 open_file(bios_path, true /*read_only*/, false /*O_DIRECT*/)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700691 .with_context(|| format!("failed to open bios {}", bios_path.display()))?,
Andrew Walbranbc55e302021-07-13 17:35:10 +0100692 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700693 _ => panic!("Did not receive a bios or kernel, should be impossible."),
694 };
695
Will Deaconc48e7832021-07-30 19:03:06 +0100696 let swiotlb = if let Some(size) = cfg.swiotlb {
697 Some(
698 size.checked_mul(1024 * 1024)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700699 .ok_or_else(|| anyhow!("requested swiotlb size too large"))?,
Will Deaconc48e7832021-07-30 19:03:06 +0100700 )
701 } else {
702 match cfg.protected_vm {
Andrew Walbran0bbbb682021-12-13 13:42:07 +0000703 ProtectionType::Protected | ProtectionType::ProtectedWithoutFirmware => {
704 Some(64 * 1024 * 1024)
705 }
Will Deaconc48e7832021-07-30 19:03:06 +0100706 ProtectionType::Unprotected => None,
707 }
708 };
709
Zach Reiznera90649a2021-03-31 12:56:08 -0700710 Ok(VmComponents {
Daniel Verkamp6a847062019-11-26 13:16:35 -0800711 memory_size: cfg
712 .memory
713 .unwrap_or(256)
714 .checked_mul(1024 * 1024)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700715 .ok_or_else(|| anyhow!("requested memory size too large"))?,
Will Deaconc48e7832021-07-30 19:03:06 +0100716 swiotlb,
Dylan Reid059a1882018-07-23 17:58:09 -0700717 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -0700718 vcpu_affinity: cfg.vcpu_affinity.clone(),
Daniel Verkamp8a72afc2021-03-15 17:55:52 -0700719 cpu_clusters: cfg.cpu_clusters.clone(),
720 cpu_capacity: cfg.cpu_capacity.clone(),
Suleiman Souhlal015c3c12020-10-07 14:15:41 +0900721 no_smt: cfg.no_smt,
Sergey Senozhatsky1e369c52021-04-13 20:23:51 +0900722 hugepages: cfg.hugepages,
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700723 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -0800724 android_fstab: cfg
725 .android_fstab
726 .as_ref()
Daniel Verkamp6b298582021-08-16 15:37:11 -0700727 .map(|x| {
728 File::open(x)
729 .with_context(|| format!("failed to open android fstab file {}", x.display()))
730 })
Tristan Muntsinger4133b012018-12-21 16:01:56 -0800731 .map_or(Ok(None), |v| v.map(Some))?,
Kansho Nishida282115b2019-12-18 13:13:14 +0900732 pstore: cfg.pstore.clone(),
Daniel Verkampe403f5c2018-12-11 16:29:26 -0800733 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -0700734 extra_kernel_params: cfg.params.clone(),
Tomasz Jeznach42644642020-05-20 23:27:59 -0700735 acpi_sdts: cfg
736 .acpi_tables
737 .iter()
Daniel Verkamp6b298582021-08-16 15:37:11 -0700738 .map(|path| {
739 SDT::from_file(path)
740 .with_context(|| format!("failed to open ACPI file {}", path.display()))
741 })
Tomasz Jeznach42644642020-05-20 23:27:59 -0700742 .collect::<Result<Vec<SDT>>>()?,
Kansho Nishidaab205af2020-08-13 18:17:50 +0900743 rt_cpus: cfg.rt_cpus.clone(),
Suleiman Souhlal63630e82021-02-18 11:53:11 +0900744 delay_rt: cfg.delay_rt,
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100745 protected_vm: cfg.protected_vm,
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900746 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznera90649a2021-03-31 12:56:08 -0700747 gdb: None,
Tomasz Jeznachccb26942021-03-30 22:44:11 -0700748 dmi_path: cfg.dmi_path.clone(),
Tomasz Jeznachd93c29f2021-04-12 11:00:24 -0700749 no_legacy: cfg.no_legacy,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +0800750 host_cpu_topology: cfg.host_cpu_topology,
Zach Reiznera90649a2021-03-31 12:56:08 -0700751 })
752}
753
Andrew Walbranb28ae8e2022-01-17 14:33:10 +0000754#[derive(Copy, Clone, Debug, Eq, PartialEq)]
Dmitry Torokhovf75699f2021-12-03 11:19:13 -0800755pub enum ExitState {
756 Reset,
757 Stop,
Andrew Walbran1a19c672022-01-24 17:24:10 +0000758 Crash,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -0800759}
760
761pub fn run_config(cfg: Config) -> Result<ExitState> {
Zach Reiznerdc748482021-04-14 13:59:30 -0700762 let components = setup_vm_components(&cfg)?;
763
764 let guest_mem_layout =
Daniel Verkamp6b298582021-08-16 15:37:11 -0700765 Arch::guest_memory_layout(&components).context("failed to create guest memory layout")?;
766 let guest_mem = GuestMemory::new(&guest_mem_layout).context("failed to create guest memory")?;
Zach Reiznerdc748482021-04-14 13:59:30 -0700767 let mut mem_policy = MemoryPolicy::empty();
768 if components.hugepages {
769 mem_policy |= MemoryPolicy::USE_HUGEPAGES;
770 }
Quentin Perret26203802021-12-02 09:48:43 +0000771 guest_mem.set_memory_policy(mem_policy);
Daniel Verkamp6b298582021-08-16 15:37:11 -0700772 let kvm = Kvm::new_with_path(&cfg.kvm_device_path).context("failed to create kvm")?;
Andrew Walbran00f1c9f2021-12-10 17:13:08 +0000773 let vm = KvmVm::new(&kvm, guest_mem, components.protected_vm).context("failed to create vm")?;
Andrew Walbrane79aba12022-01-27 14:12:35 +0000774 // Check that the VM was actually created in protected mode as expected.
775 if cfg.protected_vm != ProtectionType::Unprotected && !vm.check_capability(VmCap::Protected) {
776 bail!("Failed to create protected VM");
777 }
Daniel Verkamp6b298582021-08-16 15:37:11 -0700778 let vm_clone = vm.try_clone().context("failed to clone vm")?;
Zach Reiznerdc748482021-04-14 13:59:30 -0700779
780 enum KvmIrqChip {
781 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
782 Split(KvmSplitIrqChip),
783 Kernel(KvmKernelIrqChip),
784 }
785
786 impl KvmIrqChip {
787 fn as_mut(&mut self) -> &mut dyn IrqChipArch {
788 match self {
789 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
790 KvmIrqChip::Split(i) => i,
791 KvmIrqChip::Kernel(i) => i,
792 }
793 }
794 }
795
796 let ioapic_host_tube;
797 let mut irq_chip = if cfg.split_irqchip {
798 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
799 unimplemented!("KVM split irqchip mode only supported on x86 processors");
800 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
801 {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700802 let (host_tube, ioapic_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerdc748482021-04-14 13:59:30 -0700803 ioapic_host_tube = Some(host_tube);
804 KvmIrqChip::Split(
805 KvmSplitIrqChip::new(
806 vm_clone,
807 components.vcpu_count,
808 ioapic_device_tube,
809 Some(120),
810 )
Daniel Verkamp6b298582021-08-16 15:37:11 -0700811 .context("failed to create IRQ chip")?,
Zach Reiznerdc748482021-04-14 13:59:30 -0700812 )
813 }
814 } else {
815 ioapic_host_tube = None;
816 KvmIrqChip::Kernel(
Daniel Verkamp6b298582021-08-16 15:37:11 -0700817 KvmKernelIrqChip::new(vm_clone, components.vcpu_count)
818 .context("failed to create IRQ chip")?,
Zach Reiznerdc748482021-04-14 13:59:30 -0700819 )
820 };
821
822 run_vm::<KvmVcpu, KvmVm>(cfg, components, vm, irq_chip.as_mut(), ioapic_host_tube)
823}
824
825fn run_vm<Vcpu, V>(
Zach Reiznera90649a2021-03-31 12:56:08 -0700826 cfg: Config,
827 #[allow(unused_mut)] mut components: VmComponents,
Zach Reiznerdc748482021-04-14 13:59:30 -0700828 mut vm: V,
829 irq_chip: &mut dyn IrqChipArch,
830 ioapic_host_tube: Option<Tube>,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -0800831) -> Result<ExitState>
Zach Reiznera90649a2021-03-31 12:56:08 -0700832where
833 Vcpu: VcpuArch + 'static,
834 V: VmArch + 'static,
Zach Reiznera90649a2021-03-31 12:56:08 -0700835{
836 if cfg.sandbox {
837 // Printing something to the syslog before entering minijail so that libc's syslogger has a
838 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
839 // access to those files will not be possible.
840 info!("crosvm entering multiprocess mode");
841 }
842
Daniel Verkampf1439d42021-05-21 13:55:10 -0700843 #[cfg(feature = "usb")]
Zach Reiznera90649a2021-03-31 12:56:08 -0700844 let (usb_control_tube, usb_provider) =
Daniel Verkamp6b298582021-08-16 15:37:11 -0700845 HostBackendDeviceProvider::new().context("failed to create usb provider")?;
Daniel Verkampf1439d42021-05-21 13:55:10 -0700846
Zach Reiznera90649a2021-03-31 12:56:08 -0700847 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
848 // before any jailed devices have been spawned, so that we can catch any of them that fail very
849 // quickly.
Daniel Verkamp6b298582021-08-16 15:37:11 -0700850 let sigchld_fd = SignalFd::new(libc::SIGCHLD).context("failed to create signalfd")?;
Dylan Reid059a1882018-07-23 17:58:09 -0700851
Zach Reiznera60744b2019-02-13 17:33:32 -0800852 let control_server_socket = match &cfg.socket_path {
853 Some(path) => Some(UnlinkUnixSeqpacketListener(
Daniel Verkamp6b298582021-08-16 15:37:11 -0700854 UnixSeqpacketListener::bind(path).context("failed to create control server")?,
Zach Reiznera60744b2019-02-13 17:33:32 -0800855 )),
856 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -0700857 };
Zach Reiznera60744b2019-02-13 17:33:32 -0800858
Zach Reiznera90649a2021-03-31 12:56:08 -0700859 let mut control_tubes = Vec::new();
860
861 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
862 if let Some(port) = cfg.gdb {
863 // GDB needs a control socket to interrupt vcpus.
Daniel Verkamp6b298582021-08-16 15:37:11 -0700864 let (gdb_host_tube, gdb_control_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznera90649a2021-03-31 12:56:08 -0700865 control_tubes.push(TaggedControlTube::Vm(gdb_host_tube));
866 components.gdb = Some((port, gdb_control_tube));
867 }
868
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900869 for wl_cfg in &cfg.vhost_user_wl {
870 let wayland_host_tube = UnixSeqpacket::connect(&wl_cfg.vm_tube)
871 .map(Tube::new)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700872 .context("failed to connect to wayland tube")?;
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900873 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
874 }
875
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900876 let mut vhost_user_gpu_tubes = Vec::with_capacity(cfg.vhost_user_gpu.len());
877 for _ in 0..cfg.vhost_user_gpu.len() {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700878 let (host_tube, device_tube) = Tube::pair().context("failed to create tube")?;
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900879 vhost_user_gpu_tubes.push((
Daniel Verkamp6b298582021-08-16 15:37:11 -0700880 host_tube.try_clone().context("failed to clone tube")?,
Chirantan Ekbote44292f52021-06-25 18:31:41 +0900881 device_tube,
882 ));
883 control_tubes.push(TaggedControlTube::VmMemory(host_tube));
884 }
885
Daniel Verkamp6b298582021-08-16 15:37:11 -0700886 let (wayland_host_tube, wayland_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800887 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
Andrew Walbran3cd93602022-01-25 13:59:23 +0000888
889 let (balloon_host_tube, balloon_device_tube) = if cfg.balloon {
David Stevens8be9ef02022-01-13 22:50:24 +0900890 if let Some(ref path) = cfg.balloon_control {
891 (
892 None,
893 Some(Tube::new(
894 UnixSeqpacket::connect(path).context("failed to create balloon control")?,
895 )),
896 )
897 } else {
898 // Balloon gets a special socket so balloon requests can be forwarded
899 // from the main process.
900 let (host, device) = Tube::pair().context("failed to create tube")?;
901 // Set recv timeout to avoid deadlock on sending BalloonControlCommand
902 // before the guest is ready.
903 host.set_recv_timeout(Some(Duration::from_millis(100)))
904 .context("failed to set timeout")?;
905 (Some(host), Some(device))
906 }
Andrew Walbran3cd93602022-01-25 13:59:23 +0000907 } else {
908 (None, None)
909 };
Dylan Reid059a1882018-07-23 17:58:09 -0700910
Daniel Verkamp92f73d72018-12-04 13:17:46 -0800911 // Create one control socket per disk.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800912 let mut disk_device_tubes = Vec::new();
913 let mut disk_host_tubes = Vec::new();
Daniel Verkamp92f73d72018-12-04 13:17:46 -0800914 let disk_count = cfg.disks.len();
915 for _ in 0..disk_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700916 let (disk_host_tub, disk_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800917 disk_host_tubes.push(disk_host_tub);
918 disk_device_tubes.push(disk_device_tube);
Daniel Verkamp92f73d72018-12-04 13:17:46 -0800919 }
920
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800921 let mut pmem_device_tubes = Vec::new();
Daniel Verkampe1980a92020-02-07 11:00:55 -0800922 let pmem_count = cfg.pmem_devices.len();
923 for _ in 0..pmem_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700924 let (pmem_host_tube, pmem_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800925 pmem_device_tubes.push(pmem_device_tube);
926 control_tubes.push(TaggedControlTube::VmMsync(pmem_host_tube));
Daniel Verkampe1980a92020-02-07 11:00:55 -0800927 }
928
Daniel Verkamp6b298582021-08-16 15:37:11 -0700929 let (gpu_host_tube, gpu_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800930 control_tubes.push(TaggedControlTube::VmMemory(gpu_host_tube));
Gurchetan Singh96beafc2019-05-15 09:46:52 -0700931
Zach Reiznerdc748482021-04-14 13:59:30 -0700932 if let Some(ioapic_host_tube) = ioapic_host_tube {
933 control_tubes.push(TaggedControlTube::VmIrq(ioapic_host_tube));
934 }
Zhuocheng Dingf2e90bf2019-12-02 15:50:20 +0800935
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +0800936 let battery = if cfg.battery_type.is_some() {
Daniel Verkampcfe49462021-08-19 17:11:05 -0700937 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(clippy::manual_map))]
Alex Lauf408c732020-11-10 18:24:04 +0900938 let jail = match simple_jail(&cfg, "battery")? {
Daniel Verkampcfe49462021-08-19 17:11:05 -0700939 #[cfg_attr(not(feature = "power-monitor-powerd"), allow(unused_mut))]
Alex Lauf408c732020-11-10 18:24:04 +0900940 Some(mut jail) => {
941 // Setup a bind mount to the system D-Bus socket if the powerd monitor is used.
942 #[cfg(feature = "power-monitor-powerd")]
943 {
Fergus Dall51200512021-08-19 12:54:26 +1000944 add_current_user_to_jail(&mut jail)?;
Alex Lauf408c732020-11-10 18:24:04 +0900945
946 // Create a tmpfs in the device's root directory so that we can bind mount files.
947 jail.mount_with_data(
948 Path::new("none"),
949 Path::new("/"),
950 "tmpfs",
951 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
952 "size=67108864",
953 )?;
954
955 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
956 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
957 }
958 Some(jail)
959 }
960 None => None,
961 };
962 (&cfg.battery_type, jail)
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +0800963 } else {
964 (&cfg.battery_type, None)
965 };
966
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -0800967 let map_request: Arc<Mutex<Option<ExternalMapping>>> = Arc::new(Mutex::new(None));
968
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +0900969 let fs_count = cfg
970 .shared_dirs
971 .iter()
972 .filter(|sd| sd.kind == SharedDirKind::FS)
973 .count();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800974 let mut fs_device_tubes = Vec::with_capacity(fs_count);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +0900975 for _ in 0..fs_count {
Daniel Verkamp6b298582021-08-16 15:37:11 -0700976 let (fs_host_tube, fs_device_tube) = Tube::pair().context("failed to create tube")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800977 control_tubes.push(TaggedControlTube::Fs(fs_host_tube));
978 fs_device_tubes.push(fs_device_tube);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +0900979 }
980
Daniel Verkamp6b298582021-08-16 15:37:11 -0700981 let exit_evt = Event::new().context("failed to create event")?;
Dmitry Torokhovf75699f2021-12-03 11:19:13 -0800982 let reset_evt = Event::new().context("failed to create event")?;
Andrew Walbran1a19c672022-01-24 17:24:10 +0000983 let crash_evt = Event::new().context("failed to create event")?;
Daniel Verkamp6f4f8222022-01-05 14:09:09 -0800984 let mut sys_allocator = Arch::create_system_allocator(&vm);
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +0900985
986 // Allocate the ramoops region first. AArch64::build_vm() assumes this.
987 let ramoops_region = match &components.pstore {
988 Some(pstore) => Some(
Dennis Kempin65740a62021-10-18 16:46:57 -0700989 arch::pstore::create_memory_region(&mut vm, &mut sys_allocator, pstore)
Daniel Verkamp6b298582021-08-16 15:37:11 -0700990 .context("failed to allocate pstore region")?,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +0900991 ),
992 None => None,
993 };
994
Mattias Nisslerbbd91d02021-12-07 08:57:45 +0000995 create_file_backed_mappings(&cfg, &mut vm, &mut sys_allocator)?;
996
Daniel Verkamp891ea3e2022-01-04 12:35:55 -0800997 let phys_max_addr = (1u64 << vm.get_guest_phys_addr_bits()) - 1;
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -0800998
999 #[cfg(feature = "gpu")]
1000 // Hold on to the render server jail so it keeps running until we exit run_vm()
1001 let mut _render_server_jail = None;
1002 #[cfg(feature = "gpu")]
1003 let mut render_server_fd = None;
1004 #[cfg(feature = "gpu")]
1005 if let Some(gpu_parameters) = &cfg.gpu_parameters {
1006 if let Some(ref render_server_parameters) = gpu_parameters.render_server {
1007 let (jail, fd) = start_gpu_render_server(&cfg, render_server_parameters)?;
1008 _render_server_jail = Some(ScopedMinijail(jail));
1009 render_server_fd = Some(fd);
1010 }
1011 }
1012
David Stevens06d157a2022-01-13 23:44:48 +09001013 let init_balloon_size = components
1014 .memory_size
1015 .checked_sub(cfg.init_memory.map_or(components.memory_size, |m| {
1016 m.checked_mul(1024 * 1024).unwrap_or(u64::MAX)
1017 }))
1018 .context("failed to calculate init balloon size")?;
1019
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001020 let mut devices = create_devices(
Zach Reiznerdc748482021-04-14 13:59:30 -07001021 &cfg,
1022 &mut vm,
1023 &mut sys_allocator,
1024 &exit_evt,
Zide Chen71435c12021-03-03 15:02:02 -08001025 phys_max_addr,
Zach Reiznerdc748482021-04-14 13:59:30 -07001026 &mut control_tubes,
1027 wayland_device_tube,
1028 gpu_device_tube,
Chirantan Ekbote44292f52021-06-25 18:31:41 +09001029 vhost_user_gpu_tubes,
Zach Reiznerdc748482021-04-14 13:59:30 -07001030 balloon_device_tube,
David Stevens06d157a2022-01-13 23:44:48 +09001031 init_balloon_size,
Zach Reiznerdc748482021-04-14 13:59:30 -07001032 &mut disk_device_tubes,
1033 &mut pmem_device_tubes,
1034 &mut fs_device_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07001035 #[cfg(feature = "usb")]
Zach Reiznerdc748482021-04-14 13:59:30 -07001036 usb_provider,
1037 Arc::clone(&map_request),
Dmitry Torokhov9cbe5432022-01-25 19:17:07 -08001038 #[cfg(feature = "gpu")]
1039 render_server_fd,
Zach Reiznerdc748482021-04-14 13:59:30 -07001040 )?;
1041
Peter Fangc2bba082021-04-19 18:40:24 -07001042 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001043 for device in devices
1044 .iter_mut()
1045 .filter_map(|(dev, _)| dev.as_pci_device_mut())
1046 {
Peter Fangc2bba082021-04-19 18:40:24 -07001047 let sdts = device
1048 .generate_acpi(components.acpi_sdts)
1049 .or_else(|| {
1050 error!("ACPI table generation error");
1051 None
1052 })
Daniel Verkamp6b298582021-08-16 15:37:11 -07001053 .ok_or_else(|| anyhow!("failed to generate ACPI table"))?;
Peter Fangc2bba082021-04-19 18:40:24 -07001054 components.acpi_sdts = sdts;
1055 }
1056
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001057 // KVM_CREATE_VCPU uses apic id for x86 and uses cpu id for others.
1058 let mut kvm_vcpu_ids = Vec::new();
1059
Kuo-Hsin Yang6139da62021-04-14 16:55:24 +08001060 #[cfg_attr(not(feature = "direct"), allow(unused_mut))]
Zach Reiznerdc748482021-04-14 13:59:30 -07001061 let mut linux = Arch::build_vm::<V, Vcpu>(
Trent Begin17ccaad2019-04-17 13:51:25 -06001062 components,
Zach Reiznerdc748482021-04-14 13:59:30 -07001063 &exit_evt,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001064 &reset_evt,
Zach Reiznerdc748482021-04-14 13:59:30 -07001065 &mut sys_allocator,
Trent Begin17ccaad2019-04-17 13:51:25 -06001066 &cfg.serial_parameters,
Matt Delco45caf912019-11-13 08:11:09 -08001067 simple_jail(&cfg, "serial")?,
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08001068 battery,
Zach Reiznera90649a2021-03-31 12:56:08 -07001069 vm,
Ryo Hashimoto8f9dc1d2021-08-18 19:07:29 +09001070 ramoops_region,
Tomasz Nowickiab86d522021-09-22 05:50:46 +00001071 devices,
Zach Reiznerdc748482021-04-14 13:59:30 -07001072 irq_chip,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001073 &mut kvm_vcpu_ids,
Trent Begin17ccaad2019-04-17 13:51:25 -06001074 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001075 .context("the architecture failed to build the vm")?;
Lepton Wu60893882018-11-21 11:06:18 -08001076
Daniel Verkamp1286b482021-11-30 15:14:16 -08001077 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1078 {
1079 // Create Pcie Root Port
1080 let pcie_root_port = Arc::new(Mutex::new(PcieRootPort::new()));
1081 let (msi_host_tube, msi_device_tube) = Tube::pair().context("failed to create tube")?;
1082 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
1083 let sec_bus = (1..255)
1084 .find(|&bus_num| sys_allocator.pci_bus_empty(bus_num))
1085 .context("failed to find empty bus for Pci hotplug")?;
1086 let pci_bridge = Box::new(PciBridge::new(
1087 pcie_root_port.clone(),
1088 msi_device_tube,
1089 0,
1090 sec_bus,
1091 ));
1092 Arch::register_pci_device(&mut linux, pci_bridge, None, &mut sys_allocator)
1093 .context("Failed to configure pci bridge device")?;
1094 linux.hotplug_bus.push(pcie_root_port);
1095 }
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001096
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08001097 #[cfg(feature = "direct")]
1098 if let Some(pmio) = &cfg.direct_pmio {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001099 let direct_io = Arc::new(
1100 devices::DirectIo::new(&pmio.path, false).context("failed to open direct io device")?,
1101 );
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08001102 for range in pmio.ranges.iter() {
1103 linux
1104 .io_bus
Junichi Uekawab180f9c2021-12-07 09:21:36 +09001105 .insert_sync(direct_io.clone(), range.base, range.len)
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08001106 .unwrap();
1107 }
1108 };
1109
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001110 #[cfg(feature = "direct")]
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07001111 if let Some(mmio) = &cfg.direct_mmio {
Xiong Zhang46471a02021-11-12 00:34:42 +08001112 let direct_mmio = Arc::new(
Junichi Uekawab180f9c2021-12-07 09:21:36 +09001113 devices::DirectMmio::new(&mmio.path, false, &mmio.ranges)
Xiong Zhang46471a02021-11-12 00:34:42 +08001114 .context("failed to open direct mmio device")?,
Daniel Verkamp6b298582021-08-16 15:37:11 -07001115 );
Xiong Zhang46471a02021-11-12 00:34:42 +08001116
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07001117 for range in mmio.ranges.iter() {
1118 linux
1119 .mmio_bus
Junichi Uekawab180f9c2021-12-07 09:21:36 +09001120 .insert_sync(direct_mmio.clone(), range.base, range.len)
Tomasz Jeznach9e6c6332021-05-27 21:49:14 -07001121 .unwrap();
1122 }
1123 };
1124
1125 #[cfg(feature = "direct")]
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001126 let mut irqs = Vec::new();
1127
1128 #[cfg(feature = "direct")]
1129 for irq in &cfg.direct_level_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07001130 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001131 warn!("irq {} already reserved.", irq);
1132 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07001133 let trigger = Event::new().context("failed to create event")?;
1134 let resample = Event::new().context("failed to create event")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001135 linux
1136 .irq_chip
1137 .register_irq_event(*irq, &trigger, Some(&resample))
1138 .unwrap();
Daniel Verkamp6b298582021-08-16 15:37:11 -07001139 let direct_irq = devices::DirectIrq::new(trigger, Some(resample))
1140 .context("failed to enable interrupt forwarding")?;
1141 direct_irq
1142 .irq_enable(*irq)
1143 .context("failed to enable interrupt forwarding")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001144 irqs.push(direct_irq);
1145 }
1146
1147 #[cfg(feature = "direct")]
1148 for irq in &cfg.direct_edge_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07001149 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001150 warn!("irq {} already reserved.", irq);
1151 }
Daniel Verkamp6b298582021-08-16 15:37:11 -07001152 let trigger = Event::new().context("failed to create event")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001153 linux
1154 .irq_chip
1155 .register_irq_event(*irq, &trigger, None)
1156 .unwrap();
Daniel Verkamp6b298582021-08-16 15:37:11 -07001157 let direct_irq = devices::DirectIrq::new(trigger, None)
1158 .context("failed to enable interrupt forwarding")?;
1159 direct_irq
1160 .irq_enable(*irq)
1161 .context("failed to enable interrupt forwarding")?;
Tomasz Jeznach7271f752021-03-04 01:44:06 -08001162 irqs.push(direct_irq);
1163 }
1164
Daniel Verkamp6b298582021-08-16 15:37:11 -07001165 let gralloc = RutabagaGralloc::new().context("failed to create gralloc")?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001166 run_control(
1167 linux,
Zach Reiznerdc748482021-04-14 13:59:30 -07001168 sys_allocator,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001169 cfg,
Zach Reiznera60744b2019-02-13 17:33:32 -08001170 control_server_socket,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001171 control_tubes,
1172 balloon_host_tube,
1173 &disk_host_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07001174 #[cfg(feature = "usb")]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001175 usb_control_tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07001176 exit_evt,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001177 reset_evt,
Andrew Walbran1a19c672022-01-24 17:24:10 +00001178 crash_evt,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001179 sigchld_fd,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001180 Arc::clone(&map_request),
Gurchetan Singh293913c2020-12-09 10:44:13 -08001181 gralloc,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001182 kvm_vcpu_ids,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001183 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07001184}
1185
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001186fn get_hp_bus<V: VmArch, Vcpu: VcpuArch>(
1187 linux: &RunnableLinuxVm<V, Vcpu>,
1188 host_addr: PciAddress,
1189) -> Result<(Arc<Mutex<dyn HotPlugBus>>, u8)> {
1190 for hp_bus in linux.hotplug_bus.iter() {
1191 if let Some(number) = hp_bus.lock().is_match(host_addr) {
1192 return Ok((hp_bus.clone(), number));
1193 }
1194 }
1195 Err(anyhow!("Failed to find a suitable hotplug bus"))
1196}
1197
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001198fn add_vfio_device<V: VmArch, Vcpu: VcpuArch>(
1199 linux: &mut RunnableLinuxVm<V, Vcpu>,
1200 sys_allocator: &mut SystemAllocator,
1201 cfg: &Config,
1202 control_tubes: &mut Vec<TaggedControlTube>,
1203 vfio_path: &Path,
1204) -> Result<()> {
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001205 let host_os_str = vfio_path
1206 .file_name()
1207 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
1208 let host_str = host_os_str
1209 .to_str()
1210 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
1211 let host_addr = PciAddress::from_string(host_str);
1212
1213 let (hp_bus, bus_num) = get_hp_bus(linux, host_addr)?;
1214
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001215 let mut endpoints: BTreeMap<u32, Arc<Mutex<VfioContainer>>> = BTreeMap::new();
1216 let (vfio_pci_device, jail) = create_vfio_device(
1217 cfg,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001218 &linux.vm,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001219 sys_allocator,
1220 control_tubes,
1221 vfio_path,
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001222 Some(bus_num),
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001223 &mut endpoints,
Chuanxiao Donga8d427b2022-01-07 10:26:24 +08001224 None,
1225 IommuDevType::NoIommu,
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001226 )?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001227
1228 let pci_address = Arch::register_pci_device(linux, vfio_pci_device, jail, sys_allocator)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001229 .context("Failed to configure pci hotplug device")?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001230
Daniel Verkamp6b298582021-08-16 15:37:11 -07001231 let host_os_str = vfio_path
1232 .file_name()
1233 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
1234 let host_str = host_os_str
1235 .to_str()
1236 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001237 let host_addr = PciAddress::from_string(host_str);
1238 let host_key = HostHotPlugKey::Vfio { host_addr };
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001239 let mut hp_bus = hp_bus.lock();
1240 hp_bus.add_hotplug_device(host_key, pci_address);
1241 hp_bus.hot_plug(pci_address);
1242 Ok(())
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001243}
1244
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001245fn remove_vfio_device<V: VmArch, Vcpu: VcpuArch>(
1246 linux: &RunnableLinuxVm<V, Vcpu>,
Xiong Zhang2d45b912021-05-13 16:22:25 +08001247 sys_allocator: &mut SystemAllocator,
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001248 vfio_path: &Path,
1249) -> Result<()> {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001250 let host_os_str = vfio_path
1251 .file_name()
1252 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
1253 let host_str = host_os_str
1254 .to_str()
1255 .ok_or_else(|| anyhow!("failed to parse or find vfio path"))?;
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001256 let host_addr = PciAddress::from_string(host_str);
1257 let host_key = HostHotPlugKey::Vfio { host_addr };
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001258 for hp_bus in linux.hotplug_bus.iter() {
1259 let mut hp_bus_lock = hp_bus.lock();
1260 if let Some(pci_addr) = hp_bus_lock.get_hotplug_device(host_key) {
1261 hp_bus_lock.hot_unplug(pci_addr);
Xiong Zhang2d45b912021-05-13 16:22:25 +08001262 sys_allocator.release_pci(pci_addr.bus, pci_addr.dev, pci_addr.func);
Xiong Zhangf82f2dc2021-05-21 16:54:12 +08001263 return Ok(());
1264 }
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001265 }
1266
Daniel Verkamp6b298582021-08-16 15:37:11 -07001267 Err(anyhow!("HotPlugBus hasn't been implemented"))
Xiong Zhang9fadc3f2021-06-07 14:16:45 +08001268}
Xiong Zhang8c9fe3e2021-04-12 15:07:17 +08001269
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001270fn handle_vfio_command<V: VmArch, Vcpu: VcpuArch>(
1271 linux: &mut RunnableLinuxVm<V, Vcpu>,
1272 sys_allocator: &mut SystemAllocator,
1273 cfg: &Config,
1274 add_tubes: &mut Vec<TaggedControlTube>,
1275 vfio_path: &Path,
1276 add: bool,
1277) -> VmResponse {
1278 let ret = if add {
1279 add_vfio_device(linux, sys_allocator, cfg, add_tubes, vfio_path)
1280 } else {
1281 remove_vfio_device(linux, sys_allocator, vfio_path)
1282 };
1283
1284 match ret {
1285 Ok(()) => VmResponse::Ok,
1286 Err(e) => {
1287 error!("hanlde_vfio_command failure: {}", e);
1288 add_tubes.clear();
1289 VmResponse::Err(base::Error::new(libc::EINVAL))
1290 }
1291 }
1292}
1293
Zach Reiznerdc748482021-04-14 13:59:30 -07001294fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
1295 mut linux: RunnableLinuxVm<V, Vcpu>,
1296 mut sys_allocator: SystemAllocator,
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001297 cfg: Config,
Zach Reiznera60744b2019-02-13 17:33:32 -08001298 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001299 mut control_tubes: Vec<TaggedControlTube>,
Andrew Walbran3cd93602022-01-25 13:59:23 +00001300 balloon_host_tube: Option<Tube>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001301 disk_host_tubes: &[Tube],
Daniel Verkampf1439d42021-05-21 13:55:10 -07001302 #[cfg(feature = "usb")] usb_control_tube: Tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07001303 exit_evt: Event,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001304 reset_evt: Event,
Andrew Walbran1a19c672022-01-24 17:24:10 +00001305 crash_evt: Event,
Zach Reizner55a9e502018-10-03 10:22:32 -07001306 sigchld_fd: SignalFd,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001307 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Gurchetan Singh293913c2020-12-09 10:44:13 -08001308 mut gralloc: RutabagaGralloc,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001309 kvm_vcpu_ids: Vec<usize>,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001310) -> Result<ExitState> {
Zach Reizner5bed0d22018-03-28 02:31:11 -07001311 #[derive(PollToken)]
1312 enum Token {
1313 Exit,
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001314 Reset,
Andrew Walbran1a19c672022-01-24 17:24:10 +00001315 Crash,
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001316 Suspend,
Zach Reizner5bed0d22018-03-28 02:31:11 -07001317 ChildSignal,
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001318 IrqFd { index: IrqEventIndex },
Zach Reiznera60744b2019-02-13 17:33:32 -08001319 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07001320 VmControl { index: usize },
1321 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001322
Zach Reizner19ad1f32019-12-12 18:58:50 -08001323 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08001324 .set_raw_mode()
1325 .expect("failed to set terminal raw mode");
1326
Michael Hoylee392c462020-10-07 03:29:24 -07001327 let wait_ctx = WaitContext::build_with(&[
Zach Reiznerdc748482021-04-14 13:59:30 -07001328 (&exit_evt, Token::Exit),
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001329 (&reset_evt, Token::Reset),
Andrew Walbran1a19c672022-01-24 17:24:10 +00001330 (&crash_evt, Token::Crash),
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001331 (&linux.suspend_evt, Token::Suspend),
Zach Reiznerb2110be2019-07-23 15:55:03 -07001332 (&sigchld_fd, Token::ChildSignal),
1333 ])
Daniel Verkamp6b298582021-08-16 15:37:11 -07001334 .context("failed to add descriptor to wait context")?;
Zach Reiznerb2110be2019-07-23 15:55:03 -07001335
Zach Reiznera60744b2019-02-13 17:33:32 -08001336 if let Some(socket_server) = &control_server_socket {
Michael Hoylee392c462020-10-07 03:29:24 -07001337 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08001338 .add(socket_server, Token::VmControlServer)
Daniel Verkamp6b298582021-08-16 15:37:11 -07001339 .context("failed to add descriptor to wait context")?;
Zach Reiznera60744b2019-02-13 17:33:32 -08001340 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001341 for (index, socket) in control_tubes.iter().enumerate() {
Michael Hoylee392c462020-10-07 03:29:24 -07001342 wait_ctx
Zach Reizner55a9e502018-10-03 10:22:32 -07001343 .add(socket.as_ref(), Token::VmControl { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07001344 .context("failed to add descriptor to wait context")?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001345 }
1346
Steven Richmanf32d0b42020-06-20 21:45:32 -07001347 let events = linux
1348 .irq_chip
1349 .irq_event_tokens()
Daniel Verkamp6b298582021-08-16 15:37:11 -07001350 .context("failed to add descriptor to wait context")?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07001351
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001352 for (index, _gsi, evt) in events {
Michael Hoylee392c462020-10-07 03:29:24 -07001353 wait_ctx
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001354 .add(&evt, Token::IrqFd { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07001355 .context("failed to add descriptor to wait context")?;
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08001356 }
1357
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001358 if cfg.sandbox {
Lepton Wu20333e42019-03-14 10:48:03 -07001359 // Before starting VCPUs, in case we started with some capabilities, drop them all.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001360 drop_capabilities().context("failed to drop process capabilities")?;
Lepton Wu20333e42019-03-14 10:48:03 -07001361 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08001362
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001363 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1364 // Create a channel for GDB thread.
1365 let (to_gdb_channel, from_vcpu_channel) = if linux.gdb.is_some() {
1366 let (s, r) = mpsc::channel();
1367 (Some(s), Some(r))
1368 } else {
1369 (None, None)
1370 };
1371
Steven Richmanf32d0b42020-06-20 21:45:32 -07001372 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
1373 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
Steven Richmanf32d0b42020-06-20 21:45:32 -07001374 let use_hypervisor_signals = !linux
1375 .vm
1376 .get_hypervisor()
Andrew Walbran985491a2022-01-27 13:47:40 +00001377 .check_capability(HypervisorCap::ImmediateExit);
Anton Romanov5acc0f52022-01-28 00:18:11 +00001378 vcpu::setup_vcpu_signal_handler::<Vcpu>(use_hypervisor_signals)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07001379
Zach Reizner304e7312020-09-29 16:00:24 -07001380 let vcpus: Vec<Option<_>> = match linux.vcpus.take() {
Andrew Walbran9cfdbd92021-01-11 17:40:34 +00001381 Some(vec) => vec.into_iter().map(Some).collect(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07001382 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
1383 };
Yusuke Sato31e136a2021-08-18 11:51:38 -07001384 // Enable core scheduling before creating vCPUs so that the cookie will be
1385 // shared by all vCPU threads.
1386 // TODO(b/199312402): Avoid enabling core scheduling for the crosvm process
1387 // itself for even better performance. Only vCPUs need the feature.
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001388 if cfg.per_vm_core_scheduling {
Yusuke Sato31e136a2021-08-18 11:51:38 -07001389 if let Err(e) = enable_core_scheduling() {
1390 error!("Failed to enable core scheduling: {}", e);
1391 }
1392 }
Vineeth Pillai2b6855e2022-01-12 16:57:22 +00001393 let vcpu_cgroup_tasks_file = match &cfg.vcpu_cgroup_path {
1394 None => None,
1395 Some(cgroup_path) => {
1396 // Move main process to cgroup_path
1397 let mut f = File::create(&cgroup_path.join("tasks"))?;
1398 f.write_all(process::id().to_string().as_bytes())?;
1399 Some(f)
1400 }
1401 };
Daniel Verkamp94c35272019-09-12 13:31:30 -07001402 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Dylan Reidb0492662019-05-17 14:50:13 -07001403 let (to_vcpu_channel, from_main_channel) = mpsc::channel();
Daniel Verkampc677fb42020-09-08 13:47:49 -07001404 let vcpu_affinity = match linux.vcpu_affinity.clone() {
1405 Some(VcpuAffinity::Global(v)) => v,
1406 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(),
1407 None => Default::default(),
1408 };
Anton Romanov5acc0f52022-01-28 00:18:11 +00001409 let handle = vcpu::run_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001410 cpu_id,
ZhaoLiu2aaf7ad2021-10-10 18:22:29 +08001411 kvm_vcpu_ids[cpu_id],
Zach Reizner55a9e502018-10-03 10:22:32 -07001412 vcpu,
Daniel Verkamp6b298582021-08-16 15:37:11 -07001413 linux.vm.try_clone().context("failed to clone vm")?,
1414 linux
1415 .irq_chip
1416 .try_box_clone()
1417 .context("failed to clone irqchip")?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001418 linux.vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001419 linux.rt_cpus.contains(&cpu_id),
Daniel Verkampc677fb42020-09-08 13:47:49 -07001420 vcpu_affinity,
Suleiman Souhlal63630e82021-02-18 11:53:11 +09001421 linux.delay_rt,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001422 linux.no_smt,
Zach Reizner55a9e502018-10-03 10:22:32 -07001423 vcpu_thread_barrier.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07001424 linux.has_bios,
Colin Downs-Razouk11bed5e2021-11-02 09:33:14 -07001425 (*linux.io_bus).clone(),
1426 (*linux.mmio_bus).clone(),
Daniel Verkamp6b298582021-08-16 15:37:11 -07001427 exit_evt.try_clone().context("failed to clone event")?,
Andrew Walbranb28ae8e2022-01-17 14:33:10 +00001428 reset_evt.try_clone().context("failed to clone event")?,
Andrew Walbran1a19c672022-01-24 17:24:10 +00001429 crash_evt.try_clone().context("failed to clone event")?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001430 linux.vm.check_capability(VmCap::PvClockSuspend),
Dylan Reidb0492662019-05-17 14:50:13 -07001431 from_main_channel,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001432 use_hypervisor_signals,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001433 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1434 to_gdb_channel.clone(),
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001435 cfg.per_vm_core_scheduling,
1436 cfg.host_cpu_topology,
Vineeth Pillai2b6855e2022-01-12 16:57:22 +00001437 match vcpu_cgroup_tasks_file {
1438 None => None,
1439 Some(ref f) => Some(
1440 f.try_clone()
1441 .context("failed to clone vcpu cgroup tasks file")?,
1442 ),
1443 },
Zach Reizner55a9e502018-10-03 10:22:32 -07001444 )?;
Dylan Reidb0492662019-05-17 14:50:13 -07001445 vcpu_handles.push((handle, to_vcpu_channel));
Dylan Reid059a1882018-07-23 17:58:09 -07001446 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001447
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001448 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1449 // Spawn GDB thread.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001450 if let Some((gdb_port_num, gdb_control_tube)) = linux.gdb.take() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001451 let to_vcpu_channels = vcpu_handles
1452 .iter()
1453 .map(|(_handle, channel)| channel.clone())
1454 .collect();
1455 let target = GdbStub::new(
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001456 gdb_control_tube,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001457 to_vcpu_channels,
1458 from_vcpu_channel.unwrap(), // Must succeed to unwrap()
1459 );
1460 thread::Builder::new()
1461 .name("gdb".to_owned())
1462 .spawn(move || gdb_thread(target, gdb_port_num))
Daniel Verkamp6b298582021-08-16 15:37:11 -07001463 .context("failed to spawn GDB thread")?;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001464 };
1465
Dylan Reid059a1882018-07-23 17:58:09 -07001466 vcpu_thread_barrier.wait();
1467
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001468 let mut exit_state = ExitState::Stop;
Charles William Dick54045012021-07-27 19:11:53 +09001469 let mut balloon_stats_id: u64 = 0;
1470
Michael Hoylee392c462020-10-07 03:29:24 -07001471 'wait: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07001472 let events = {
Michael Hoylee392c462020-10-07 03:29:24 -07001473 match wait_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001474 Ok(v) => v,
1475 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001476 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001477 break;
1478 }
1479 }
1480 };
Zach Reiznera60744b2019-02-13 17:33:32 -08001481
Steven Richmanf32d0b42020-06-20 21:45:32 -07001482 if let Err(e) = linux.irq_chip.process_delayed_irq_events() {
1483 warn!("can't deliver delayed irqs: {}", e);
1484 }
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08001485
Zach Reiznera60744b2019-02-13 17:33:32 -08001486 let mut vm_control_indices_to_remove = Vec::new();
Michael Hoylee392c462020-10-07 03:29:24 -07001487 for event in events.iter().filter(|e| e.is_readable) {
1488 match event.token {
Zach Reizner5bed0d22018-03-28 02:31:11 -07001489 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001490 info!("vcpu requested shutdown");
Michael Hoylee392c462020-10-07 03:29:24 -07001491 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001492 }
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001493 Token::Reset => {
1494 info!("vcpu requested reset");
1495 exit_state = ExitState::Reset;
1496 break 'wait;
1497 }
Andrew Walbran1a19c672022-01-24 17:24:10 +00001498 Token::Crash => {
1499 info!("vcpu crashed");
1500 exit_state = ExitState::Crash;
1501 break 'wait;
1502 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001503 Token::Suspend => {
1504 info!("VM requested suspend");
1505 linux.suspend_evt.read().unwrap();
Anton Romanov5acc0f52022-01-28 00:18:11 +00001506 vcpu::kick_all_vcpus(
Zach Reiznerdc748482021-04-14 13:59:30 -07001507 &vcpu_handles,
1508 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08001509 VcpuControl::RunState(VmRunMode::Suspending),
Zach Reiznerdc748482021-04-14 13:59:30 -07001510 );
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001511 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001512 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001513 // Print all available siginfo structs, then exit the loop.
Daniel Verkamp6b298582021-08-16 15:37:11 -07001514 while let Some(siginfo) =
1515 sigchld_fd.read().context("failed to create signalfd")?
1516 {
Zach Reizner3ba00982019-01-23 19:04:43 -08001517 let pid = siginfo.ssi_pid;
1518 let pid_label = match linux.pid_debug_label_map.get(&pid) {
1519 Some(label) => format!("{} (pid {})", label, pid),
1520 None => format!("pid {}", pid),
1521 };
David Tolnayf5032762018-12-03 10:46:45 -08001522 error!(
1523 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08001524 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08001525 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08001526 }
Michael Hoylee392c462020-10-07 03:29:24 -07001527 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001528 }
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001529 Token::IrqFd { index } => {
1530 if let Err(e) = linux.irq_chip.service_irq_event(index) {
1531 error!("failed to signal irq {}: {}", index, e);
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08001532 }
1533 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001534 Token::VmControlServer => {
1535 if let Some(socket_server) = &control_server_socket {
1536 match socket_server.accept() {
1537 Ok(socket) => {
Michael Hoylee392c462020-10-07 03:29:24 -07001538 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08001539 .add(
1540 &socket,
1541 Token::VmControl {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001542 index: control_tubes.len(),
Zach Reiznera60744b2019-02-13 17:33:32 -08001543 },
1544 )
Daniel Verkamp6b298582021-08-16 15:37:11 -07001545 .context("failed to add descriptor to wait context")?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001546 control_tubes.push(TaggedControlTube::Vm(Tube::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08001547 }
1548 Err(e) => error!("failed to accept socket: {}", e),
1549 }
1550 }
1551 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001552 Token::VmControl { index } => {
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001553 let mut add_tubes = Vec::new();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001554 if let Some(socket) = control_tubes.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001555 match socket {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001556 TaggedControlTube::Vm(tube) => match tube.recv::<VmRequest>() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001557 Ok(request) => {
1558 let mut run_mode_opt = None;
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001559 let response = match request {
1560 VmRequest::VfioCommand { vfio_path, add } => {
1561 handle_vfio_command(
1562 &mut linux,
1563 &mut sys_allocator,
1564 &cfg,
1565 &mut add_tubes,
1566 &vfio_path,
1567 add,
1568 )
1569 }
1570 _ => request.execute(
1571 &mut run_mode_opt,
Andrew Walbran3cd93602022-01-25 13:59:23 +00001572 balloon_host_tube.as_ref(),
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001573 &mut balloon_stats_id,
1574 disk_host_tubes,
1575 #[cfg(feature = "usb")]
1576 Some(&usb_control_tube),
1577 #[cfg(not(feature = "usb"))]
1578 None,
1579 &mut linux.bat_control,
1580 &vcpu_handles,
1581 ),
1582 };
1583
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001584 if let Err(e) = tube.send(&response) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001585 error!("failed to send VmResponse: {}", e);
1586 }
1587 if let Some(run_mode) = run_mode_opt {
1588 info!("control socket changed run mode to {}", run_mode);
1589 match run_mode {
1590 VmRunMode::Exiting => {
Michael Hoylee392c462020-10-07 03:29:24 -07001591 break 'wait;
Jakub Starond99cd0a2019-04-11 14:09:39 -07001592 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001593 other => {
Chuanxiao Dong2bbe85c2020-11-12 17:18:07 +08001594 if other == VmRunMode::Running {
Daniel Verkampda4e8a92021-07-21 13:49:02 -07001595 for dev in &linux.resume_notify_devices {
1596 dev.lock().resume_imminent();
1597 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08001598 }
Anton Romanov5acc0f52022-01-28 00:18:11 +00001599 vcpu::kick_all_vcpus(
Steven Richman11dc6712020-09-02 15:39:14 -07001600 &vcpu_handles,
Zach Reiznerdc748482021-04-14 13:59:30 -07001601 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08001602 VcpuControl::RunState(other),
Steven Richman11dc6712020-09-02 15:39:14 -07001603 );
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001604 }
1605 }
1606 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001607 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001608 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001609 if let TubeError::Disconnected = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001610 vm_control_indices_to_remove.push(index);
1611 } else {
1612 error!("failed to recv VmRequest: {}", e);
1613 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001614 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001615 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001616 TaggedControlTube::VmMemory(tube) => {
1617 match tube.recv::<VmMemoryRequest>() {
1618 Ok(request) => {
1619 let response = request.execute(
1620 &mut linux.vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07001621 &mut sys_allocator,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001622 Arc::clone(&map_request),
1623 &mut gralloc,
1624 );
1625 if let Err(e) = tube.send(&response) {
1626 error!("failed to send VmMemoryControlResponse: {}", e);
1627 }
1628 }
1629 Err(e) => {
1630 if let TubeError::Disconnected = e {
1631 vm_control_indices_to_remove.push(index);
1632 } else {
1633 error!("failed to recv VmMemoryControlRequest: {}", e);
1634 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001635 }
1636 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001637 }
1638 TaggedControlTube::VmIrq(tube) => match tube.recv::<VmIrqRequest>() {
Xiong Zhang2515b752019-09-19 10:29:02 +08001639 Ok(request) => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001640 let response = {
1641 let irq_chip = &mut linux.irq_chip;
1642 request.execute(
1643 |setup| match setup {
1644 IrqSetup::Event(irq, ev) => {
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07001645 if let Some(event_index) = irq_chip
1646 .register_irq_event(irq, ev, None)?
1647 {
1648 match wait_ctx.add(
1649 ev,
1650 Token::IrqFd {
1651 index: event_index
1652 },
1653 ) {
1654 Err(e) => {
1655 warn!("failed to add IrqFd to poll context: {}", e);
1656 Err(e)
1657 },
1658 Ok(_) => {
1659 Ok(())
1660 }
1661 }
1662 } else {
1663 Ok(())
1664 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001665 }
1666 IrqSetup::Route(route) => irq_chip.route_irq(route),
Xiong Zhang4fbc5542021-06-01 11:29:14 +08001667 IrqSetup::UnRegister(irq, ev) => irq_chip.unregister_irq_event(irq, ev),
Steven Richmanf32d0b42020-06-20 21:45:32 -07001668 },
Zach Reiznerdc748482021-04-14 13:59:30 -07001669 &mut sys_allocator,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001670 )
1671 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001672 if let Err(e) = tube.send(&response) {
Xiong Zhang2515b752019-09-19 10:29:02 +08001673 error!("failed to send VmIrqResponse: {}", e);
1674 }
1675 }
1676 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001677 if let TubeError::Disconnected = e {
Xiong Zhang2515b752019-09-19 10:29:02 +08001678 vm_control_indices_to_remove.push(index);
1679 } else {
1680 error!("failed to recv VmIrqRequest: {}", e);
1681 }
1682 }
1683 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001684 TaggedControlTube::VmMsync(tube) => {
1685 match tube.recv::<VmMsyncRequest>() {
1686 Ok(request) => {
1687 let response = request.execute(&mut linux.vm);
1688 if let Err(e) = tube.send(&response) {
1689 error!("failed to send VmMsyncResponse: {}", e);
1690 }
1691 }
1692 Err(e) => {
1693 if let TubeError::Disconnected = e {
1694 vm_control_indices_to_remove.push(index);
1695 } else {
1696 error!("failed to recv VmMsyncRequest: {}", e);
1697 }
Daniel Verkampe1980a92020-02-07 11:00:55 -08001698 }
1699 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001700 }
1701 TaggedControlTube::Fs(tube) => match tube.recv::<FsMappingRequest>() {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001702 Ok(request) => {
1703 let response =
Zach Reiznerdc748482021-04-14 13:59:30 -07001704 request.execute(&mut linux.vm, &mut sys_allocator);
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001705 if let Err(e) = tube.send(&response) {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001706 error!("failed to send VmResponse: {}", e);
1707 }
1708 }
1709 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001710 if let TubeError::Disconnected = e {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001711 vm_control_indices_to_remove.push(index);
1712 } else {
1713 error!("failed to recv VmResponse: {}", e);
1714 }
1715 }
1716 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08001717 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001718 }
Xiong Zhangc78e72b2021-04-08 11:31:41 +08001719 if !add_tubes.is_empty() {
1720 for (idx, socket) in add_tubes.iter().enumerate() {
1721 wait_ctx
1722 .add(
1723 socket.as_ref(),
1724 Token::VmControl {
1725 index: idx + control_tubes.len(),
1726 },
1727 )
1728 .context(
1729 "failed to add hotplug vfio-pci descriptor ot wait context",
1730 )?;
1731 }
1732 control_tubes.append(&mut add_tubes);
1733 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001734 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001735 }
1736 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001737
Vikram Auradkarede68c72021-07-01 14:33:54 -07001738 // It's possible more data is readable and buffered while the socket is hungup,
1739 // so don't delete the tube from the poll context until we're sure all the
1740 // data is read.
1741 // Below case covers a condition where we have received a hungup event and the tube is not
1742 // readable.
1743 // In case of readable tube, once all data is read, any attempt to read more data on hungup
1744 // tube should fail. On such failure, we get Disconnected error and index gets added to
1745 // vm_control_indices_to_remove by the time we reach here.
1746 for event in events.iter().filter(|e| e.is_hungup && !e.is_readable) {
1747 if let Token::VmControl { index } = event.token {
1748 vm_control_indices_to_remove.push(index);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001749 }
1750 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001751
1752 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08001753 // preserves correct indexes as each element is removed.
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07001754 vm_control_indices_to_remove.sort_unstable_by_key(|&k| Reverse(k));
Zach Reiznera60744b2019-02-13 17:33:32 -08001755 vm_control_indices_to_remove.dedup();
1756 for index in vm_control_indices_to_remove {
Michael Hoylee392c462020-10-07 03:29:24 -07001757 // Delete the socket from the `wait_ctx` synchronously. Otherwise, the kernel will do
1758 // this automatically when the FD inserted into the `wait_ctx` is closed after this
Zide Chen89584072019-11-14 10:33:51 -08001759 // if-block, but this removal can be deferred unpredictably. In some instances where the
Michael Hoylee392c462020-10-07 03:29:24 -07001760 // system is under heavy load, we can even get events returned by `wait_ctx` for an FD
Zide Chen89584072019-11-14 10:33:51 -08001761 // that has already been closed. Because the token associated with that spurious event
1762 // now belongs to a different socket, the control loop will start to interact with
1763 // sockets that might not be ready to use. This can cause incorrect hangup detection or
1764 // blocking on a socket that will never be ready. See also: crbug.com/1019986
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001765 if let Some(socket) = control_tubes.get(index) {
Daniel Verkamp6b298582021-08-16 15:37:11 -07001766 wait_ctx
1767 .delete(socket)
1768 .context("failed to remove descriptor from wait context")?;
Zide Chen89584072019-11-14 10:33:51 -08001769 }
1770
1771 // This line implicitly drops the socket at `index` when it gets returned by
1772 // `swap_remove`. After this line, the socket at `index` is not the one from
1773 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
Michael Hoylee392c462020-10-07 03:29:24 -07001774 // use `wait_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001775 control_tubes.swap_remove(index);
1776 if let Some(tube) = control_tubes.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07001777 wait_ctx
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001778 .modify(tube, EventType::Read, Token::VmControl { index })
Daniel Verkamp6b298582021-08-16 15:37:11 -07001779 .context("failed to add descriptor to wait context")?;
Zach Reiznera60744b2019-02-13 17:33:32 -08001780 }
1781 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001782 }
1783
Anton Romanov5acc0f52022-01-28 00:18:11 +00001784 vcpu::kick_all_vcpus(
Zach Reiznerdc748482021-04-14 13:59:30 -07001785 &vcpu_handles,
1786 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08001787 VcpuControl::RunState(VmRunMode::Exiting),
Zach Reiznerdc748482021-04-14 13:59:30 -07001788 );
Steven Richman11dc6712020-09-02 15:39:14 -07001789 for (handle, _) in vcpu_handles {
1790 if let Err(e) = handle.join() {
1791 error!("failed to join vcpu thread: {:?}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001792 }
1793 }
1794
Daniel Verkamp94c35272019-09-12 13:31:30 -07001795 // Explicitly drop the VM structure here to allow the devices to clean up before the
1796 // control sockets are closed when this function exits.
1797 mem::drop(linux);
1798
Zach Reizner19ad1f32019-12-12 18:58:50 -08001799 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08001800 .set_canon_mode()
1801 .expect("failed to restore canonical mode for terminal");
1802
Dmitry Torokhovf75699f2021-12-03 11:19:13 -08001803 Ok(exit_state)
Zach Reizner39aa26b2017-12-12 18:03:23 -08001804}