blob: 77949e8c1348648c2300f76cdc048314c0142a5b [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Charles William Dick0e3d4b62020-12-14 12:16:46 +09005use std::cmp::{max, min, Reverse};
Jakub Starona3411ea2019-04-24 10:55:25 -07006use std::convert::TryFrom;
John Batesb220eac2020-09-14 17:03:02 -07007#[cfg(feature = "gpu")]
8use std::env;
David Tolnayfdac5ed2019-03-08 16:56:14 -08009use std::error::Error as StdError;
Dylan Reid059a1882018-07-23 17:58:09 -070010use std::ffi::CStr;
David Tolnayc69f9752019-03-01 18:07:56 -080011use std::fmt::{self, Display};
Dylan Reid059a1882018-07-23 17:58:09 -070012use std::fs::{File, OpenOptions};
Zach Reizner55a9e502018-10-03 10:22:32 -070013use std::io::{self, stdin, Read};
Steven Richmanf32d0b42020-06-20 21:45:32 -070014use std::iter;
Daniel Verkamp94c35272019-09-12 13:31:30 -070015use std::mem;
David Tolnay2b089fc2019-03-04 15:33:22 -080016use std::net::Ipv4Addr;
Daniel Verkamp6f9215c2019-08-20 09:41:22 -070017#[cfg(feature = "gpu")]
Zach Reizner0f2cfb02019-06-19 17:46:03 -070018use std::num::NonZeroU8;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +090019use std::num::ParseIntError;
Michael Hoylea596a072020-11-10 19:32:45 -080020use std::os::unix::io::FromRawFd;
Zach Reiznera60744b2019-02-13 17:33:32 -080021use std::os::unix::net::UnixStream;
Zach Reizner39aa26b2017-12-12 18:03:23 -080022use std::path::{Path, PathBuf};
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +090023use std::ptr;
Chirantan Ekbote448516e2018-07-24 16:07:42 -070024use std::str;
Dylan Reidb0492662019-05-17 14:50:13 -070025use std::sync::{mpsc, Arc, Barrier};
26
Zach Reizner39aa26b2017-12-12 18:03:23 -080027use std::thread;
28use std::thread::JoinHandle;
Charles William Dick0bf8a552019-10-29 15:36:01 +090029use std::time::Duration;
Zach Reizner39aa26b2017-12-12 18:03:23 -080030
David Tolnay41a6f842019-03-01 16:18:44 -080031use libc::{self, c_int, gid_t, uid_t};
Zach Reizner39aa26b2017-12-12 18:03:23 -080032
Tomasz Jeznach42644642020-05-20 23:27:59 -070033use acpi_tables::sdt::SDT;
34
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +090035use base::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080036use base::*;
Keiichi Watanabe60686582021-03-12 04:53:51 +090037use devices::virtio::vhost::user::{
Woody Chow5890b702021-02-12 14:57:02 +090038 Block as VhostUserBlock, Error as VhostUserError, Fs as VhostUserFs, Net as VhostUserNet,
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +090039 Wl as VhostUserWl,
Keiichi Watanabe60686582021-03-12 04:53:51 +090040};
Zach Reizner65b98f12019-11-22 17:34:58 -080041#[cfg(feature = "gpu")]
42use devices::virtio::EventDevice;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070043use devices::virtio::{self, Console, VirtioDevice};
paulhsiace17e6e2020-08-28 18:37:45 +080044#[cfg(feature = "audio")]
45use devices::Ac97Dev;
Xiong Zhang17b0daf2019-04-23 17:14:50 +080046use devices::{
Daniel Verkampf1439d42021-05-21 13:55:10 -070047 self, IrqChip, IrqEventIndex, KvmKernelIrqChip, PciDevice, VcpuRunState, VfioContainer,
48 VfioDevice, VfioPciDevice, VirtioPciDevice,
Xiong Zhang17b0daf2019-04-23 17:14:50 +080049};
Daniel Verkampf1439d42021-05-21 13:55:10 -070050#[cfg(feature = "usb")]
51use devices::{HostBackendDeviceProvider, XhciController};
Steven Richmanf32d0b42020-06-20 21:45:32 -070052use hypervisor::kvm::{Kvm, KvmVcpu, KvmVm};
Zach Reizner304e7312020-09-29 16:00:24 -070053use hypervisor::{HypervisorCap, Vcpu, VcpuExit, VcpuRunHandle, Vm, VmCap};
Allen Webbf3024c82020-06-19 07:19:48 -070054use minijail::{self, Minijail};
David Tolnay2b089fc2019-03-04 15:33:22 -080055use net_util::{Error as NetError, MacAddress, Tap};
David Tolnay3df35522019-03-11 12:36:30 -070056use remain::sorted;
Xiong Zhang87a3b442019-10-29 17:32:44 +080057use resources::{Alloc, MmioType, SystemAllocator};
Gurchetan Singh293913c2020-12-09 10:44:13 -080058use rutabaga_gfx::RutabagaGralloc;
Dylan Reidb0492662019-05-17 14:50:13 -070059use sync::Mutex;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -080060use vm_control::*;
Sergey Senozhatskyd78d05b2021-04-13 20:59:58 +090061use vm_memory::{GuestAddress, GuestMemory, MemoryPolicy};
Zach Reizner39aa26b2017-12-12 18:03:23 -080062
Keiichi Watanabec5262e92020-10-21 15:57:33 +090063#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
64use crate::gdb::{gdb_thread, GdbStub};
Keiichi Watanabef3a37f42021-01-21 15:41:11 +090065use crate::{
Woody Chow5890b702021-02-12 14:57:02 +090066 Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption, VhostUserFsOption,
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +090067 VhostUserOption, VhostUserWlOption,
Keiichi Watanabef3a37f42021-01-21 15:41:11 +090068};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070069use arch::{
Daniel Verkampc677fb42020-09-08 13:47:49 -070070 self, LinuxArch, RunnableLinuxVm, SerialHardware, SerialParameters, VcpuAffinity,
71 VirtioDeviceStub, VmComponents, VmImage,
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070072};
Sonny Raoed517d12018-02-13 22:09:43 -080073
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080074#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070075use {
76 aarch64::AArch64 as Arch,
Steven Richman11dc6712020-09-02 15:39:14 -070077 devices::IrqChipAArch64 as IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -070078 hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
79};
Zach Reizner55a9e502018-10-03 10:22:32 -070080#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070081use {
Steven Richman11dc6712020-09-02 15:39:14 -070082 devices::{IrqChipX86_64 as IrqChipArch, KvmSplitIrqChip},
83 hypervisor::{VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
Steven Richmanf32d0b42020-06-20 21:45:32 -070084 x86_64::X8664arch as Arch,
85};
Zach Reizner39aa26b2017-12-12 18:03:23 -080086
David Tolnay3df35522019-03-11 12:36:30 -070087#[sorted]
Dylan Reid059a1882018-07-23 17:58:09 -070088#[derive(Debug)]
Zach Reizner39aa26b2017-12-12 18:03:23 -080089pub enum Error {
Michael Hoyle6b196952020-08-02 20:09:41 -070090 AddGpuDeviceMemory(base::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -070091 AddIrqChipVcpu(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -070092 AddPmemDeviceMemory(base::Error),
Lepton Wu60893882018-11-21 11:06:18 -080093 AllocateGpuDeviceAddress,
Jakub Starona3411ea2019-04-24 10:55:25 -070094 AllocatePmemDeviceAddress(resources::Error),
Charles William Dick0e3d4b62020-12-14 12:16:46 +090095 BalloonActualTooLarge,
David Tolnay2b089fc2019-03-04 15:33:22 -080096 BalloonDeviceNew(virtio::BalloonError),
Michael Hoyle6b196952020-08-02 20:09:41 -070097 BlockDeviceNew(base::Error),
98 BlockSignal(base::signal::Error),
David Tolnaybe034262019-03-04 17:48:36 -080099 BuildVm(<Arch as LinuxArch>::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700100 ChownTpmStorage(base::Error),
Michael Hoyle685316f2020-09-16 15:29:20 -0700101 CloneEvent(base::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700102 CloneVcpu(base::Error),
103 ConfigureVcpu(<Arch as LinuxArch>::Error),
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900104 ConnectTube(io::Error),
Andrew Scull1590e6f2020-03-18 18:00:47 +0000105 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +0800106 CreateAc97(devices::PciDeviceError),
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -0700107 CreateConsole(arch::serial::Error),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800108 CreateControlServer(io::Error),
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700109 CreateDiskError(disk::Error),
Michael Hoyle685316f2020-09-16 15:29:20 -0700110 CreateEvent(base::Error),
Gurchetan Singh293913c2020-12-09 10:44:13 -0800111 CreateGrallocError(rutabaga_gfx::RutabagaError),
Zach Reiznerdc748482021-04-14 13:59:30 -0700112 CreateGuestMemory(vm_memory::GuestMemoryError),
113 CreateIrqChip(base::Error),
Zach Reiznera90649a2021-03-31 12:56:08 -0700114 CreateKvm(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700115 CreateSignalFd(base::SignalFdError),
Zach Reizner8fb52112017-12-13 16:04:39 -0800116 CreateSocket(io::Error),
Chirantan Ekbote49fa08f2018-11-16 13:26:53 -0800117 CreateTapDevice(NetError),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700118 CreateTimer(base::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -0800119 CreateTpmStorage(PathBuf, io::Error),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800120 CreateTube(TubeError),
Daniel Verkampf1439d42021-05-21 13:55:10 -0700121 #[cfg(feature = "usb")]
Jingkui Wang100e6e42019-03-08 20:41:57 -0800122 CreateUsbProvider(devices::usb::host_backend::error::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700123 CreateVcpu(base::Error),
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800124 CreateVfioDevice(devices::vfio::VfioError),
Zach Reiznera90649a2021-03-31 12:56:08 -0700125 CreateVm(base::Error),
Michael Hoylee392c462020-10-07 03:29:24 -0700126 CreateWaitContext(base::Error),
Allen Webbf3024c82020-06-19 07:19:48 -0700127 DeviceJail(minijail::Error),
128 DevicePivotRoot(minijail::Error),
Tomasz Jeznach7271f752021-03-04 01:44:06 -0800129 #[cfg(feature = "direct")]
Tomasz Jeznach3ce74762021-02-26 01:01:53 -0800130 DirectIo(io::Error),
Tomasz Jeznach7271f752021-03-04 01:44:06 -0800131 #[cfg(feature = "direct")]
132 DirectIrq(devices::DirectIrqError),
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800133 Disk(PathBuf, io::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700134 DiskImageLock(base::Error),
135 DropCapabilities(base::Error),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900136 FsDeviceNew(virtio::fs::Error),
137 GetMaxOpenFiles(io::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700138 GetSignalMask(signal::Error),
Charles William Dick0e3d4b62020-12-14 12:16:46 +0900139 GuestCachedMissing(),
140 GuestCachedTooLarge(std::num::TryFromIntError),
141 GuestFreeMissing(),
142 GuestFreeTooLarge(std::num::TryFromIntError),
Zach Reiznera90649a2021-03-31 12:56:08 -0700143 GuestMemoryLayout(<Arch as LinuxArch>::Error),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900144 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
145 HandleDebugCommand(<Arch as LinuxArch>::Error),
Lepton Wu39133a02019-02-27 12:42:29 -0800146 InputDeviceNew(virtio::InputError),
147 InputEventsOpen(std::io::Error),
Dylan Reid20566442018-04-02 15:06:15 -0700148 InvalidFdPath,
Zach Reizner579bd2c2018-09-14 15:43:33 -0700149 InvalidWaylandPath,
Allen Webbf3024c82020-06-19 07:19:48 -0700150 IoJail(minijail::Error),
David Tolnayfdac5ed2019-03-08 16:56:14 -0800151 LoadKernel(Box<dyn StdError>),
Daniel Verkamp6a847062019-11-26 13:16:35 -0800152 MemoryTooLarge,
David Tolnay2b089fc2019-03-04 15:33:22 -0800153 NetDeviceNew(virtio::NetError),
Tomasz Jeznach42644642020-05-20 23:27:59 -0700154 OpenAcpiTable(PathBuf, io::Error),
Tristan Muntsinger4133b012018-12-21 16:01:56 -0800155 OpenAndroidFstab(PathBuf, io::Error),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700156 OpenBios(PathBuf, io::Error),
Daniel Verkampe403f5c2018-12-11 16:29:26 -0800157 OpenInitrd(PathBuf, io::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800158 OpenKernel(PathBuf, io::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -0800159 OpenVinput(PathBuf, io::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800160 P9DeviceNew(virtio::P9Error),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900161 ParseMaxOpenFiles(ParseIntError),
Lepton Wu39133a02019-02-27 12:42:29 -0800162 PivotRootDoesntExist(&'static str),
Jakub Starona3411ea2019-04-24 10:55:25 -0700163 PmemDeviceImageTooBig,
Michael Hoyle6b196952020-08-02 20:09:41 -0700164 PmemDeviceNew(base::Error),
Charles William Dick0bf8a552019-10-29 15:36:01 +0900165 ReadMemAvailable(io::Error),
Charles William Dick0e3d4b62020-12-14 12:16:46 +0900166 ReadStatm(io::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700167 RegisterBalloon(arch::DeviceRegistrationError),
168 RegisterBlock(arch::DeviceRegistrationError),
169 RegisterGpu(arch::DeviceRegistrationError),
170 RegisterNet(arch::DeviceRegistrationError),
171 RegisterP9(arch::DeviceRegistrationError),
172 RegisterRng(arch::DeviceRegistrationError),
Michael Hoyle6b196952020-08-02 20:09:41 -0700173 RegisterSignalHandler(base::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700174 RegisterWayland(arch::DeviceRegistrationError),
Michael Hoyle6b196952020-08-02 20:09:41 -0700175 ReserveGpuMemory(base::MmapError),
176 ReserveMemory(base::Error),
177 ReservePmemMemory(base::MmapError),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700178 ResetTimer(base::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800179 RngDeviceNew(virtio::RngError),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700180 RunnableVcpu(base::Error),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900181 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
182 SendDebugStatus(Box<mpsc::SendError<VcpuDebugStatusMessage>>),
Allen Webbf3024c82020-06-19 07:19:48 -0700183 SettingGidMap(minijail::Error),
184 SettingMaxOpenFiles(minijail::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700185 SettingSignalMask(base::Error),
Allen Webbf3024c82020-06-19 07:19:48 -0700186 SettingUidMap(minijail::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700187 SignalFd(base::SignalFdError),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900188 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
189 SpawnGdbServer(io::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800190 SpawnVcpu(io::Error),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700191 Timer(base::Error),
Michael Hoylea596a072020-11-10 19:32:45 -0800192 ValidateRawDescriptor(base::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800193 VhostNetDeviceNew(virtio::vhost::Error),
Keiichi Watanabe60686582021-03-12 04:53:51 +0900194 VhostUserBlockDeviceNew(VhostUserError),
Woody Chow5890b702021-02-12 14:57:02 +0900195 VhostUserFsDeviceNew(VhostUserError),
Keiichi Watanabe60686582021-03-12 04:53:51 +0900196 VhostUserNetDeviceNew(VhostUserError),
197 VhostUserNetWithNetArgs,
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900198 VhostUserWlDeviceNew(VhostUserError),
David Tolnay2b089fc2019-03-04 15:33:22 -0800199 VhostVsockDeviceNew(virtio::vhost::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700200 VirtioPciDev(base::Error),
Michael Hoylee392c462020-10-07 03:29:24 -0700201 WaitContextAdd(base::Error),
202 WaitContextDelete(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700203 WaylandDeviceNew(base::Error),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800204}
205
David Tolnayc69f9752019-03-01 18:07:56 -0800206impl Display for Error {
David Tolnay3df35522019-03-11 12:36:30 -0700207 #[remain::check]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800208 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
David Tolnayc69f9752019-03-01 18:07:56 -0800209 use self::Error::*;
210
David Tolnay3df35522019-03-11 12:36:30 -0700211 #[sorted]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800212 match self {
Lepton Wu60893882018-11-21 11:06:18 -0800213 AddGpuDeviceMemory(e) => write!(f, "failed to add gpu device memory: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700214 AddIrqChipVcpu(e) => write!(f, "failed to add vcpu to irq chip: {}", e),
Jakub Starona3411ea2019-04-24 10:55:25 -0700215 AddPmemDeviceMemory(e) => write!(f, "failed to add pmem device memory: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800216 AllocateGpuDeviceAddress => write!(f, "failed to allocate gpu device guest address"),
Jakub Starona3411ea2019-04-24 10:55:25 -0700217 AllocatePmemDeviceAddress(e) => {
218 write!(f, "failed to allocate memory for pmem device: {}", e)
219 }
Charles William Dick0e3d4b62020-12-14 12:16:46 +0900220 BalloonActualTooLarge => write!(f, "balloon actual size is too large"),
David Tolnayc69f9752019-03-01 18:07:56 -0800221 BalloonDeviceNew(e) => write!(f, "failed to create balloon: {}", e),
222 BlockDeviceNew(e) => write!(f, "failed to create block device: {}", e),
223 BlockSignal(e) => write!(f, "failed to block signal: {}", e),
David Tolnaybe034262019-03-04 17:48:36 -0800224 BuildVm(e) => write!(f, "The architecture failed to build the vm: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800225 ChownTpmStorage(e) => write!(f, "failed to chown tpm storage: {}", e),
Michael Hoyle685316f2020-09-16 15:29:20 -0700226 CloneEvent(e) => write!(f, "failed to clone event: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700227 CloneVcpu(e) => write!(f, "failed to clone vcpu: {}", e),
228 ConfigureVcpu(e) => write!(f, "failed to configure vcpu: {}", e),
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900229 ConnectTube(e) => write!(f, "failed to connect to tube: {}", e),
Andrew Scull1590e6f2020-03-18 18:00:47 +0000230 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +0800231 CreateAc97(e) => write!(f, "failed to create ac97 device: {}", e),
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -0700232 CreateConsole(e) => write!(f, "failed to create console device: {}", e),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800233 CreateControlServer(e) => write!(f, "failed to create control server: {}", e),
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700234 CreateDiskError(e) => write!(f, "failed to create virtual disk: {}", e),
Michael Hoyle685316f2020-09-16 15:29:20 -0700235 CreateEvent(e) => write!(f, "failed to create event: {}", e),
Gurchetan Singh293913c2020-12-09 10:44:13 -0800236 CreateGrallocError(e) => write!(f, "failed to create gralloc: {}", e),
Zach Reiznerdc748482021-04-14 13:59:30 -0700237 CreateGuestMemory(e) => write!(f, "failed to create guest memory: {}", e),
238 CreateIrqChip(e) => write!(f, "failed to create IRQ chip: {}", e),
Zach Reiznera90649a2021-03-31 12:56:08 -0700239 CreateKvm(e) => write!(f, "failed to create kvm: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800240 CreateSignalFd(e) => write!(f, "failed to create signalfd: {}", e),
241 CreateSocket(e) => write!(f, "failed to create socket: {}", e),
242 CreateTapDevice(e) => write!(f, "failed to create tap device: {}", e),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700243 CreateTimer(e) => write!(f, "failed to create Timer: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800244 CreateTpmStorage(p, e) => {
245 write!(f, "failed to create tpm storage dir {}: {}", p.display(), e)
246 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800247 CreateTube(e) => write!(f, "failed to create tube: {}", e),
Daniel Verkampf1439d42021-05-21 13:55:10 -0700248 #[cfg(feature = "usb")]
Jingkui Wang100e6e42019-03-08 20:41:57 -0800249 CreateUsbProvider(e) => write!(f, "failed to create usb provider: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700250 CreateVcpu(e) => write!(f, "failed to create vcpu: {}", e),
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800251 CreateVfioDevice(e) => write!(f, "Failed to create vfio device {}", e),
Zach Reiznera90649a2021-03-31 12:56:08 -0700252 CreateVm(e) => write!(f, "failed to create vm: {}", e),
Michael Hoylee392c462020-10-07 03:29:24 -0700253 CreateWaitContext(e) => write!(f, "failed to create wait context: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800254 DeviceJail(e) => write!(f, "failed to jail device: {}", e),
255 DevicePivotRoot(e) => write!(f, "failed to pivot root device: {}", e),
Tomasz Jeznach7271f752021-03-04 01:44:06 -0800256 #[cfg(feature = "direct")]
Tomasz Jeznach3ce74762021-02-26 01:01:53 -0800257 DirectIo(e) => write!(f, "failed to open direct io device: {}", e),
Tomasz Jeznach7271f752021-03-04 01:44:06 -0800258 #[cfg(feature = "direct")]
259 DirectIrq(e) => write!(f, "failed to enable interrupt forwarding: {}", e),
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800260 Disk(p, e) => write!(f, "failed to load disk image {}: {}", p.display(), e),
David Tolnayc69f9752019-03-01 18:07:56 -0800261 DiskImageLock(e) => write!(f, "failed to lock disk image: {}", e),
Dmitry Torokhov71006072019-03-06 10:56:51 -0800262 DropCapabilities(e) => write!(f, "failed to drop process capabilities: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900263 FsDeviceNew(e) => write!(f, "failed to create fs device: {}", e),
264 GetMaxOpenFiles(e) => write!(f, "failed to get max number of open files: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700265 GetSignalMask(e) => write!(f, "failed to retrieve signal mask for vcpu: {}", e),
Charles William Dick0e3d4b62020-12-14 12:16:46 +0900266 GuestCachedMissing() => write!(f, "guest cached is missing from balloon stats"),
267 GuestCachedTooLarge(e) => write!(f, "guest cached is too large: {}", e),
268 GuestFreeMissing() => write!(f, "guest free is missing from balloon stats"),
269 GuestFreeTooLarge(e) => write!(f, "guest free is too large: {}", e),
Zach Reiznera90649a2021-03-31 12:56:08 -0700270 GuestMemoryLayout(e) => write!(f, "failed to create guest memory layout: {}", e),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900271 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
272 HandleDebugCommand(e) => write!(f, "failed to handle a gdb command: {}", e),
David Tolnay64cd5ea2019-04-15 15:56:35 -0700273 InputDeviceNew(e) => write!(f, "failed to set up input device: {}", e),
274 InputEventsOpen(e) => write!(f, "failed to open event device: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800275 InvalidFdPath => write!(f, "failed parsing a /proc/self/fd/*"),
276 InvalidWaylandPath => write!(f, "wayland socket path has no parent or file name"),
David Tolnayfd0971d2019-03-04 17:15:57 -0800277 IoJail(e) => write!(f, "{}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800278 LoadKernel(e) => write!(f, "failed to load kernel: {}", e),
Daniel Verkamp6a847062019-11-26 13:16:35 -0800279 MemoryTooLarge => write!(f, "requested memory size too large"),
David Tolnayc69f9752019-03-01 18:07:56 -0800280 NetDeviceNew(e) => write!(f, "failed to set up virtio networking: {}", e),
Tomasz Jeznach42644642020-05-20 23:27:59 -0700281 OpenAcpiTable(p, e) => write!(f, "failed to open ACPI file {}: {}", p.display(), e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800282 OpenAndroidFstab(p, e) => write!(
David Tolnayb4bd00f2019-02-12 17:51:26 -0800283 f,
284 "failed to open android fstab file {}: {}",
285 p.display(),
286 e
287 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700288 OpenBios(p, e) => write!(f, "failed to open bios {}: {}", p.display(), e),
David Tolnay3df35522019-03-11 12:36:30 -0700289 OpenInitrd(p, e) => write!(f, "failed to open initrd {}: {}", p.display(), e),
290 OpenKernel(p, e) => write!(f, "failed to open kernel image {}: {}", p.display(), e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800291 OpenVinput(p, e) => write!(f, "failed to open vinput device {}: {}", p.display(), e),
David Tolnayc69f9752019-03-01 18:07:56 -0800292 P9DeviceNew(e) => write!(f, "failed to create 9p device: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900293 ParseMaxOpenFiles(e) => write!(f, "failed to parse max number of open files: {}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800294 PivotRootDoesntExist(p) => write!(f, "{} doesn't exist, can't jail devices.", p),
Jakub Starona3411ea2019-04-24 10:55:25 -0700295 PmemDeviceImageTooBig => {
296 write!(f, "failed to create pmem device: pmem device image too big")
297 }
298 PmemDeviceNew(e) => write!(f, "failed to create pmem device: {}", e),
Charles William Dick0e3d4b62020-12-14 12:16:46 +0900299 ReadMemAvailable(e) => write!(
300 f,
301 "failed to read /sys/kernel/mm/chromeos-low_mem/available: {}",
302 e
303 ),
304 ReadStatm(e) => write!(f, "failed to read /proc/self/statm: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800305 RegisterBalloon(e) => write!(f, "error registering balloon device: {}", e),
306 RegisterBlock(e) => write!(f, "error registering block device: {}", e),
307 RegisterGpu(e) => write!(f, "error registering gpu device: {}", e),
308 RegisterNet(e) => write!(f, "error registering net device: {}", e),
309 RegisterP9(e) => write!(f, "error registering 9p device: {}", e),
310 RegisterRng(e) => write!(f, "error registering rng device: {}", e),
311 RegisterSignalHandler(e) => write!(f, "error registering signal handler: {}", e),
312 RegisterWayland(e) => write!(f, "error registering wayland device: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800313 ReserveGpuMemory(e) => write!(f, "failed to reserve gpu memory: {}", e),
314 ReserveMemory(e) => write!(f, "failed to reserve memory: {}", e),
Jakub Starona3411ea2019-04-24 10:55:25 -0700315 ReservePmemMemory(e) => write!(f, "failed to reserve pmem memory: {}", e),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700316 ResetTimer(e) => write!(f, "failed to reset Timer: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800317 RngDeviceNew(e) => write!(f, "failed to set up rng: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700318 RunnableVcpu(e) => write!(f, "failed to set thread id for vcpu: {}", e),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900319 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
320 SendDebugStatus(e) => write!(f, "failed to send a debug status to GDB thread: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800321 SettingGidMap(e) => write!(f, "error setting GID map: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900322 SettingMaxOpenFiles(e) => write!(f, "error setting max open files: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700323 SettingSignalMask(e) => write!(f, "failed to set the signal mask for vcpu: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800324 SettingUidMap(e) => write!(f, "error setting UID map: {}", e),
325 SignalFd(e) => write!(f, "failed to read signal fd: {}", e),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900326 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
327 SpawnGdbServer(e) => write!(f, "failed to spawn GDB thread: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800328 SpawnVcpu(e) => write!(f, "failed to spawn VCPU thread: {}", e),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700329 Timer(e) => write!(f, "failed to read timer fd: {}", e),
Michael Hoylea596a072020-11-10 19:32:45 -0800330 ValidateRawDescriptor(e) => write!(f, "failed to validate raw descriptor: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800331 VhostNetDeviceNew(e) => write!(f, "failed to set up vhost networking: {}", e),
Keiichi Watanabef3a37f42021-01-21 15:41:11 +0900332 VhostUserBlockDeviceNew(e) => {
333 write!(f, "failed to set up vhost-user block device: {}", e)
334 }
Tomasz Jeznachccb26942021-03-30 22:44:11 -0700335 VhostUserFsDeviceNew(e) => write!(f, "failed to set up vhost-user fs device: {}", e),
336 VhostUserNetDeviceNew(e) => write!(f, "failed to set up vhost-user net device: {}", e),
337 VhostUserNetWithNetArgs => write!(
338 f,
339 "vhost-user-net cannot be used with any of --host_ip, --netmask or --mac"
340 ),
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900341 VhostUserWlDeviceNew(e) => {
342 write!(f, "failed to set up vhost-user wl device: {}", e)
343 }
David Tolnayc69f9752019-03-01 18:07:56 -0800344 VhostVsockDeviceNew(e) => write!(f, "failed to set up virtual socket device: {}", e),
345 VirtioPciDev(e) => write!(f, "failed to create virtio pci dev: {}", e),
Michael Hoylee392c462020-10-07 03:29:24 -0700346 WaitContextAdd(e) => write!(f, "failed to add descriptor to wait context: {}", e),
347 WaitContextDelete(e) => {
348 write!(f, "failed to remove descriptor from wait context: {}", e)
349 }
David Tolnayc69f9752019-03-01 18:07:56 -0800350 WaylandDeviceNew(e) => write!(f, "failed to create wayland device: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800351 }
352 }
353}
354
Allen Webbf3024c82020-06-19 07:19:48 -0700355impl From<minijail::Error> for Error {
356 fn from(err: minijail::Error) -> Self {
David Tolnayfd0971d2019-03-04 17:15:57 -0800357 Error::IoJail(err)
358 }
359}
360
David Tolnayc69f9752019-03-01 18:07:56 -0800361impl std::error::Error for Error {}
Dylan Reid059a1882018-07-23 17:58:09 -0700362
Zach Reizner39aa26b2017-12-12 18:03:23 -0800363type Result<T> = std::result::Result<T, Error>;
364
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800365enum TaggedControlTube {
366 Fs(Tube),
367 Vm(Tube),
368 VmMemory(Tube),
369 VmIrq(Tube),
370 VmMsync(Tube),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700371}
372
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800373impl AsRef<Tube> for TaggedControlTube {
374 fn as_ref(&self) -> &Tube {
375 use self::TaggedControlTube::*;
Jakub Starond99cd0a2019-04-11 14:09:39 -0700376 match &self {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800377 Fs(tube) | Vm(tube) | VmMemory(tube) | VmIrq(tube) | VmMsync(tube) => tube,
Jakub Starond99cd0a2019-04-11 14:09:39 -0700378 }
379 }
380}
381
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800382impl AsRawDescriptor for TaggedControlTube {
Michael Hoylee392c462020-10-07 03:29:24 -0700383 fn as_raw_descriptor(&self) -> RawDescriptor {
Michael Hoylea596a072020-11-10 19:32:45 -0800384 self.as_ref().as_raw_descriptor()
Jakub Starond99cd0a2019-04-11 14:09:39 -0700385 }
386}
387
Andrew Walbranf50bab62020-07-07 13:22:53 +0100388fn get_max_open_files() -> Result<u64> {
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +0900389 let mut buf = mem::MaybeUninit::<libc::rlimit64>::zeroed();
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900390
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +0900391 // Safe because this will only modify `buf` and we check the return value.
392 let res = unsafe { libc::prlimit64(0, libc::RLIMIT_NOFILE, ptr::null(), buf.as_mut_ptr()) };
393 if res == 0 {
394 // Safe because the kernel guarantees that the struct is fully initialized.
395 let limit = unsafe { buf.assume_init() };
396 Ok(limit.rlim_max)
397 } else {
398 Err(Error::GetMaxOpenFiles(io::Error::last_os_error()))
399 }
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900400}
401
Matt Delcoc24ad782020-02-14 13:24:36 -0800402struct SandboxConfig<'a> {
403 limit_caps: bool,
404 log_failures: bool,
405 seccomp_policy: &'a Path,
406 uid_map: Option<&'a str>,
407 gid_map: Option<&'a str>,
408}
409
Zach Reizner44863792019-06-26 14:22:08 -0700410fn create_base_minijail(
411 root: &Path,
Matt Delcoc24ad782020-02-14 13:24:36 -0800412 r_limit: Option<u64>,
413 config: Option<&SandboxConfig>,
Zach Reizner44863792019-06-26 14:22:08 -0700414) -> Result<Minijail> {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800415 // All child jails run in a new user namespace without any users mapped,
416 // they run as nobody unless otherwise configured.
David Tolnay5bbbf612018-12-01 17:49:30 -0800417 let mut j = Minijail::new().map_err(Error::DeviceJail)?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800418
419 if let Some(config) = config {
420 j.namespace_pids();
421 j.namespace_user();
422 j.namespace_user_disable_setgroups();
423 if config.limit_caps {
424 // Don't need any capabilities.
425 j.use_caps(0);
426 }
427 if let Some(uid_map) = config.uid_map {
428 j.uidmap(uid_map).map_err(Error::SettingUidMap)?;
429 }
430 if let Some(gid_map) = config.gid_map {
431 j.gidmap(gid_map).map_err(Error::SettingGidMap)?;
432 }
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900433 // Run in a new mount namespace.
434 j.namespace_vfs();
435
Matt Delcoc24ad782020-02-14 13:24:36 -0800436 // Run in an empty network namespace.
437 j.namespace_net();
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900438
439 // Don't allow the device to gain new privileges.
Matt Delcoc24ad782020-02-14 13:24:36 -0800440 j.no_new_privs();
441
442 // By default we'll prioritize using the pre-compiled .bpf over the .policy
443 // file (the .bpf is expected to be compiled using "trap" as the failure
444 // behavior instead of the default "kill" behavior).
445 // Refer to the code comment for the "seccomp-log-failures"
446 // command-line parameter for an explanation about why the |log_failures|
447 // flag forces the use of .policy files (and the build-time alternative to
448 // this run-time flag).
449 let bpf_policy_file = config.seccomp_policy.with_extension("bpf");
450 if bpf_policy_file.exists() && !config.log_failures {
451 j.parse_seccomp_program(&bpf_policy_file)
452 .map_err(Error::DeviceJail)?;
453 } else {
454 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP,
455 // which will correctly kill the entire device process if a worker
456 // thread commits a seccomp violation.
457 j.set_seccomp_filter_tsync();
458 if config.log_failures {
459 j.log_seccomp_filter_failures();
460 }
461 j.parse_seccomp_filters(&config.seccomp_policy.with_extension("policy"))
462 .map_err(Error::DeviceJail)?;
463 }
464 j.use_seccomp_filter();
465 // Don't do init setup.
466 j.run_as_init();
467 }
468
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900469 // Only pivot_root if we are not re-using the current root directory.
470 if root != Path::new("/") {
471 // It's safe to call `namespace_vfs` multiple times.
472 j.namespace_vfs();
473 j.enter_pivot_root(root).map_err(Error::DevicePivotRoot)?;
474 }
Matt Delco45caf912019-11-13 08:11:09 -0800475
Matt Delcoc24ad782020-02-14 13:24:36 -0800476 // Most devices don't need to open many fds.
477 let limit = if let Some(r) = r_limit { r } else { 1024u64 };
478 j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)
479 .map_err(Error::SettingMaxOpenFiles)?;
480
Zach Reizner39aa26b2017-12-12 18:03:23 -0800481 Ok(j)
482}
483
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800484fn simple_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
Lepton Wu9105e9f2019-03-14 11:38:31 -0700485 if cfg.sandbox {
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800486 let pivot_root: &str = option_env!("DEFAULT_PIVOT_ROOT").unwrap_or("/var/empty");
487 // A directory for a jailed device's pivot root.
488 let root_path = Path::new(pivot_root);
489 if !root_path.exists() {
490 return Err(Error::PivotRootDoesntExist(pivot_root));
491 }
492 let policy_path: PathBuf = cfg.seccomp_policy_dir.join(policy);
Matt Delcoc24ad782020-02-14 13:24:36 -0800493 let config = SandboxConfig {
494 limit_caps: true,
495 log_failures: cfg.seccomp_log_failures,
496 seccomp_policy: &policy_path,
497 uid_map: None,
498 gid_map: None,
499 };
500 Ok(Some(create_base_minijail(root_path, None, Some(&config))?))
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800501 } else {
502 Ok(None)
503 }
504}
505
David Tolnayfd0971d2019-03-04 17:15:57 -0800506type DeviceResult<T = VirtioDeviceStub> = std::result::Result<T, Error>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800507
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800508fn create_block_device(cfg: &Config, disk: &DiskOption, disk_device_tube: Tube) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800509 // Special case '/proc/self/fd/*' paths. The FD is already open, just use it.
510 let raw_image: File = if disk.path.parent() == Some(Path::new("/proc/self/fd")) {
511 // Safe because we will validate |raw_fd|.
Michael Hoylea596a072020-11-10 19:32:45 -0800512 unsafe { File::from_raw_descriptor(raw_descriptor_from_path(&disk.path)?) }
David Tolnay2b089fc2019-03-04 15:33:22 -0800513 } else {
514 OpenOptions::new()
515 .read(true)
516 .write(!disk.read_only)
517 .open(&disk.path)
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800518 .map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?
David Tolnay2b089fc2019-03-04 15:33:22 -0800519 };
520 // Lock the disk image to prevent other crosvm instances from using it.
521 let lock_op = if disk.read_only {
522 FlockOperation::LockShared
523 } else {
524 FlockOperation::LockExclusive
525 };
526 flock(&raw_image, lock_op, true).map_err(Error::DiskImageLock)?;
527
Dylan Reid503c5ab2020-07-17 11:20:07 -0700528 let dev = if disk::async_ok(&raw_image).map_err(Error::CreateDiskError)? {
529 let async_file = disk::create_async_disk_file(raw_image).map_err(Error::CreateDiskError)?;
530 Box::new(
531 virtio::BlockAsync::new(
532 virtio::base_features(cfg.protected_vm),
533 async_file,
534 disk.read_only,
535 disk.sparse,
536 disk.block_size,
Daniel Verkampdd0ee592021-03-29 13:05:22 -0700537 disk.id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800538 Some(disk_device_tube),
Dylan Reid503c5ab2020-07-17 11:20:07 -0700539 )
540 .map_err(Error::BlockDeviceNew)?,
541 ) as Box<dyn VirtioDevice>
542 } else {
543 let disk_file = disk::create_disk_file(raw_image).map_err(Error::CreateDiskError)?;
544 Box::new(
545 virtio::Block::new(
546 virtio::base_features(cfg.protected_vm),
547 disk_file,
548 disk.read_only,
549 disk.sparse,
550 disk.block_size,
551 disk.id,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800552 Some(disk_device_tube),
Dylan Reid503c5ab2020-07-17 11:20:07 -0700553 )
554 .map_err(Error::BlockDeviceNew)?,
555 ) as Box<dyn VirtioDevice>
556 };
David Tolnay2b089fc2019-03-04 15:33:22 -0800557
558 Ok(VirtioDeviceStub {
Dylan Reid503c5ab2020-07-17 11:20:07 -0700559 dev,
Matt Delco45caf912019-11-13 08:11:09 -0800560 jail: simple_jail(&cfg, "block_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800561 })
562}
563
Keiichi Watanabef3a37f42021-01-21 15:41:11 +0900564fn create_vhost_user_block_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
565 let dev = VhostUserBlock::new(virtio::base_features(cfg.protected_vm), &opt.socket)
566 .map_err(Error::VhostUserBlockDeviceNew)?;
567
568 Ok(VirtioDeviceStub {
569 dev: Box::new(dev),
570 // no sandbox here because virtqueue handling is exported to a different process.
571 jail: None,
572 })
573}
574
Woody Chow5890b702021-02-12 14:57:02 +0900575fn create_vhost_user_fs_device(cfg: &Config, option: &VhostUserFsOption) -> DeviceResult {
576 let dev = VhostUserFs::new(
577 virtio::base_features(cfg.protected_vm),
578 &option.socket,
579 &option.tag,
580 )
581 .map_err(Error::VhostUserFsDeviceNew)?;
582
583 Ok(VirtioDeviceStub {
584 dev: Box::new(dev),
585 // no sandbox here because virtqueue handling is exported to a different process.
586 jail: None,
587 })
588}
589
David Tolnay2b089fc2019-03-04 15:33:22 -0800590fn create_rng_device(cfg: &Config) -> DeviceResult {
Keiichi Watanabef70350b2020-11-24 21:57:53 +0900591 let dev =
592 virtio::Rng::new(virtio::base_features(cfg.protected_vm)).map_err(Error::RngDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800593
594 Ok(VirtioDeviceStub {
595 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800596 jail: simple_jail(&cfg, "rng_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800597 })
598}
599
600#[cfg(feature = "tpm")]
601fn create_tpm_device(cfg: &Config) -> DeviceResult {
602 use std::ffi::CString;
603 use std::fs;
604 use std::process;
David Tolnay2b089fc2019-03-04 15:33:22 -0800605
606 let tpm_storage: PathBuf;
Matt Delco45caf912019-11-13 08:11:09 -0800607 let mut tpm_jail = simple_jail(&cfg, "tpm_device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800608
609 match &mut tpm_jail {
610 Some(jail) => {
611 // Create a tmpfs in the device's root directory for tpm
612 // simulator storage. The size is 20*1024, or 20 KB.
613 jail.mount_with_data(
614 Path::new("none"),
615 Path::new("/"),
616 "tmpfs",
617 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
618 "size=20480",
619 )?;
620
621 let crosvm_ids = add_crosvm_user_to_jail(jail, "tpm")?;
622
623 let pid = process::id();
624 let tpm_pid_dir = format!("/run/vm/tpm.{}", pid);
625 tpm_storage = Path::new(&tpm_pid_dir).to_owned();
David Tolnayfd0971d2019-03-04 17:15:57 -0800626 fs::create_dir_all(&tpm_storage)
627 .map_err(|e| Error::CreateTpmStorage(tpm_storage.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800628 let tpm_pid_dir_c = CString::new(tpm_pid_dir).expect("no nul bytes");
David Tolnayfd0971d2019-03-04 17:15:57 -0800629 chown(&tpm_pid_dir_c, crosvm_ids.uid, crosvm_ids.gid)
630 .map_err(Error::ChownTpmStorage)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800631
632 jail.mount_bind(&tpm_storage, &tpm_storage, true)?;
633 }
634 None => {
635 // Path used inside cros_sdk which does not have /run/vm.
636 tpm_storage = Path::new("/tmp/tpm-simulator").to_owned();
637 }
638 }
639
640 let dev = virtio::Tpm::new(tpm_storage);
641
642 Ok(VirtioDeviceStub {
643 dev: Box::new(dev),
644 jail: tpm_jail,
645 })
646}
647
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700648fn create_single_touch_device(
649 cfg: &Config,
650 single_touch_spec: &TouchDeviceOption,
651 idx: u32,
652) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800653 let socket = single_touch_spec
654 .get_path()
655 .into_unix_stream()
656 .map_err(|e| {
657 error!("failed configuring virtio single touch: {:?}", e);
658 e
659 })?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800660
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800661 let (width, height) = single_touch_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700662 let dev = virtio::new_single_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700663 idx,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700664 socket,
665 width,
666 height,
667 virtio::base_features(cfg.protected_vm),
668 )
669 .map_err(Error::InputDeviceNew)?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800670 Ok(VirtioDeviceStub {
671 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800672 jail: simple_jail(&cfg, "input_device")?,
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800673 })
674}
675
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700676fn create_multi_touch_device(
677 cfg: &Config,
678 multi_touch_spec: &TouchDeviceOption,
679 idx: u32,
680) -> DeviceResult {
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000681 let socket = multi_touch_spec
682 .get_path()
683 .into_unix_stream()
684 .map_err(|e| {
685 error!("failed configuring virtio multi touch: {:?}", e);
686 e
687 })?;
688
689 let (width, height) = multi_touch_spec.get_size();
690 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700691 idx,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +0000692 socket,
693 width,
694 height,
695 virtio::base_features(cfg.protected_vm),
696 )
697 .map_err(Error::InputDeviceNew)?;
698
699 Ok(VirtioDeviceStub {
700 dev: Box::new(dev),
701 jail: simple_jail(&cfg, "input_device")?,
702 })
703}
704
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700705fn create_trackpad_device(
706 cfg: &Config,
707 trackpad_spec: &TouchDeviceOption,
708 idx: u32,
709) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800710 let socket = trackpad_spec.get_path().into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800711 error!("failed configuring virtio trackpad: {}", e);
712 e
713 })?;
714
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800715 let (width, height) = trackpad_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700716 let dev = virtio::new_trackpad(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700717 idx,
Noah Goldd4ca29b2020-10-27 12:21:52 -0700718 socket,
719 width,
720 height,
721 virtio::base_features(cfg.protected_vm),
722 )
723 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800724
725 Ok(VirtioDeviceStub {
726 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800727 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800728 })
729}
730
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700731fn create_mouse_device<T: IntoUnixStream>(cfg: &Config, mouse_socket: T, idx: u32) -> DeviceResult {
Zach Reizner65b98f12019-11-22 17:34:58 -0800732 let socket = mouse_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800733 error!("failed configuring virtio mouse: {}", e);
734 e
735 })?;
736
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700737 let dev = virtio::new_mouse(idx, socket, virtio::base_features(cfg.protected_vm))
Noah Goldd4ca29b2020-10-27 12:21:52 -0700738 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800739
740 Ok(VirtioDeviceStub {
741 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800742 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800743 })
744}
745
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700746fn create_keyboard_device<T: IntoUnixStream>(
747 cfg: &Config,
748 keyboard_socket: T,
749 idx: u32,
750) -> DeviceResult {
Zach Reizner65b98f12019-11-22 17:34:58 -0800751 let socket = keyboard_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800752 error!("failed configuring virtio keyboard: {}", e);
753 e
754 })?;
755
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700756 let dev = virtio::new_keyboard(idx, socket, virtio::base_features(cfg.protected_vm))
Noah Goldd4ca29b2020-10-27 12:21:52 -0700757 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800758
759 Ok(VirtioDeviceStub {
760 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800761 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800762 })
763}
764
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700765fn create_switches_device<T: IntoUnixStream>(
766 cfg: &Config,
767 switches_socket: T,
768 idx: u32,
769) -> DeviceResult {
Daniel Norman5e23df72021-03-11 10:11:02 -0800770 let socket = switches_socket.into_unix_stream().map_err(|e| {
771 error!("failed configuring virtio switches: {}", e);
772 e
773 })?;
774
Jorge E. Moreira6635ca42021-04-28 13:11:41 -0700775 let dev = virtio::new_switches(idx, socket, virtio::base_features(cfg.protected_vm))
Daniel Norman5e23df72021-03-11 10:11:02 -0800776 .map_err(Error::InputDeviceNew)?;
777
778 Ok(VirtioDeviceStub {
779 dev: Box::new(dev),
780 jail: simple_jail(&cfg, "input_device")?,
781 })
782}
783
David Tolnay2b089fc2019-03-04 15:33:22 -0800784fn create_vinput_device(cfg: &Config, dev_path: &Path) -> DeviceResult {
785 let dev_file = OpenOptions::new()
786 .read(true)
787 .write(true)
788 .open(dev_path)
David Tolnayfd0971d2019-03-04 17:15:57 -0800789 .map_err(|e| Error::OpenVinput(dev_path.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800790
Noah Goldd4ca29b2020-10-27 12:21:52 -0700791 let dev = virtio::new_evdev(dev_file, virtio::base_features(cfg.protected_vm))
792 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800793
794 Ok(VirtioDeviceStub {
795 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800796 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800797 })
798}
799
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800800fn create_balloon_device(cfg: &Config, tube: Tube) -> DeviceResult {
801 let dev = virtio::Balloon::new(virtio::base_features(cfg.protected_vm), tube)
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100802 .map_err(Error::BalloonDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800803
804 Ok(VirtioDeviceStub {
805 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800806 jail: simple_jail(&cfg, "balloon_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800807 })
808}
809
Michael Hoylea596a072020-11-10 19:32:45 -0800810fn create_tap_net_device(cfg: &Config, tap_fd: RawDescriptor) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800811 // Safe because we ensure that we get a unique handle to the fd.
812 let tap = unsafe {
Michael Hoylea596a072020-11-10 19:32:45 -0800813 Tap::from_raw_descriptor(
814 validate_raw_descriptor(tap_fd).map_err(Error::ValidateRawDescriptor)?,
815 )
816 .map_err(Error::CreateTapDevice)?
David Tolnay2b089fc2019-03-04 15:33:22 -0800817 };
818
Xiong Zhang773c7072020-03-20 10:39:55 +0800819 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
820 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700821 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800822 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
823 vq_pairs = 1;
824 }
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100825 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +0100826 let dev = virtio::Net::from(features, tap, vq_pairs).map_err(Error::NetDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800827
828 Ok(VirtioDeviceStub {
829 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800830 jail: simple_jail(&cfg, "net_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800831 })
832}
833
834fn create_net_device(
835 cfg: &Config,
836 host_ip: Ipv4Addr,
837 netmask: Ipv4Addr,
838 mac_address: MacAddress,
839 mem: &GuestMemory,
840) -> DeviceResult {
Xiong Zhang773c7072020-03-20 10:39:55 +0800841 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
842 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700843 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800844 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
845 vq_pairs = 1;
846 }
847
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100848 let features = virtio::base_features(cfg.protected_vm);
David Tolnay2b089fc2019-03-04 15:33:22 -0800849 let dev = if cfg.vhost_net {
Will Deacon81d5adb2020-10-06 18:37:48 +0100850 let dev = virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(
Christian Blichmann2f5d4b62021-03-10 18:08:08 +0100851 &cfg.vhost_net_device_path,
Will Deacon81d5adb2020-10-06 18:37:48 +0100852 features,
853 host_ip,
854 netmask,
855 mac_address,
856 mem,
857 )
858 .map_err(Error::VhostNetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800859 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800860 } else {
Will Deacon81d5adb2020-10-06 18:37:48 +0100861 let dev = virtio::Net::<Tap>::new(features, host_ip, netmask, mac_address, vq_pairs)
Xiong Zhang773c7072020-03-20 10:39:55 +0800862 .map_err(Error::NetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800863 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800864 };
865
866 let policy = if cfg.vhost_net {
Matt Delco45caf912019-11-13 08:11:09 -0800867 "vhost_net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800868 } else {
Matt Delco45caf912019-11-13 08:11:09 -0800869 "net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800870 };
871
872 Ok(VirtioDeviceStub {
873 dev,
874 jail: simple_jail(&cfg, policy)?,
875 })
876}
877
Keiichi Watanabe60686582021-03-12 04:53:51 +0900878fn create_vhost_user_net_device(cfg: &Config, opt: &VhostUserOption) -> DeviceResult {
879 let dev = VhostUserNet::new(virtio::base_features(cfg.protected_vm), &opt.socket)
880 .map_err(Error::VhostUserNetDeviceNew)?;
881
882 Ok(VirtioDeviceStub {
883 dev: Box::new(dev),
884 // no sandbox here because virtqueue handling is exported to a different process.
885 jail: None,
886 })
887}
888
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +0900889fn create_vhost_user_wl_device(cfg: &Config, opt: &VhostUserWlOption) -> DeviceResult {
890 // The crosvm wl device expects us to connect the tube before it will accept a vhost-user
891 // connection.
892 let dev = VhostUserWl::new(virtio::base_features(cfg.protected_vm), &opt.socket)
893 .map_err(Error::VhostUserWlDeviceNew)?;
894
895 Ok(VirtioDeviceStub {
896 dev: Box::new(dev),
897 // no sandbox here because virtqueue handling is exported to a different process.
898 jail: None,
899 })
900}
901
David Tolnay2b089fc2019-03-04 15:33:22 -0800902#[cfg(feature = "gpu")]
903fn create_gpu_device(
904 cfg: &Config,
Michael Hoyle685316f2020-09-16 15:29:20 -0700905 exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800906 gpu_device_tube: Tube,
907 resource_bridges: Vec<Tube>,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900908 wayland_socket_path: Option<&PathBuf>,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700909 x_display: Option<String>,
Zach Reizner65b98f12019-11-22 17:34:58 -0800910 event_devices: Vec<EventDevice>,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700911 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Gurchetan Singhdb174782019-10-01 15:16:15 -0700912 mem: &GuestMemory,
David Tolnay2b089fc2019-03-04 15:33:22 -0800913) -> DeviceResult {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700914 let mut display_backends = vec![
915 virtio::DisplayBackend::X(x_display),
Jason Macnak60eb1fb2020-01-09 14:36:29 -0800916 virtio::DisplayBackend::Stub,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700917 ];
918
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700919 let wayland_socket_dirs = cfg
920 .wayland_socket_paths
921 .iter()
922 .map(|(_name, path)| path.parent())
923 .collect::<Option<Vec<_>>>()
924 .ok_or(Error::InvalidWaylandPath)?;
925
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900926 if let Some(socket_path) = wayland_socket_path {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700927 display_backends.insert(
928 0,
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -0700929 virtio::DisplayBackend::Wayland(Some(socket_path.to_owned())),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700930 );
931 }
932
David Tolnay2b089fc2019-03-04 15:33:22 -0800933 let dev = virtio::Gpu::new(
Michael Hoyle685316f2020-09-16 15:29:20 -0700934 exit_evt.try_clone().map_err(Error::CloneEvent)?,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800935 Some(gpu_device_tube),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700936 NonZeroU8::new(1).unwrap(), // number of scanouts
Zach Reiznerd49bcdb2021-01-07 08:30:28 -0800937 resource_bridges,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700938 display_backends,
Jason Macnakcc7070b2019-11-06 14:48:12 -0800939 cfg.gpu_parameters.as_ref().unwrap(),
Zach Reizner65b98f12019-11-22 17:34:58 -0800940 event_devices,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700941 map_request,
942 cfg.sandbox,
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100943 virtio::base_features(cfg.protected_vm),
Gurchetan Singh781d9752021-02-15 17:45:22 -0800944 cfg.wayland_socket_paths.clone(),
Gurchetan Singhdb174782019-10-01 15:16:15 -0700945 mem.clone(),
David Tolnay2b089fc2019-03-04 15:33:22 -0800946 );
947
Matt Delco45caf912019-11-13 08:11:09 -0800948 let jail = match simple_jail(&cfg, "gpu_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800949 Some(mut jail) => {
950 // Create a tmpfs in the device's root directory so that we can bind mount the
951 // dri directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
952 jail.mount_with_data(
953 Path::new("none"),
954 Path::new("/"),
955 "tmpfs",
956 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
957 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800958 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800959
960 // Device nodes required for DRM.
961 let sys_dev_char_path = Path::new("/sys/dev/char");
David Tolnayfd0971d2019-03-04 17:15:57 -0800962 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800963 let sys_devices_path = Path::new("/sys/devices");
David Tolnayfd0971d2019-03-04 17:15:57 -0800964 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
Jason Macnak23400522020-08-28 09:10:46 -0700965
David Tolnay2b089fc2019-03-04 15:33:22 -0800966 let drm_dri_path = Path::new("/dev/dri");
Jason Macnak23400522020-08-28 09:10:46 -0700967 if drm_dri_path.exists() {
968 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
969 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800970
John Batesb220eac2020-09-14 17:03:02 -0700971 // Prepare GPU shader disk cache directory.
972 if let Some(cache_dir) = cfg
973 .gpu_parameters
974 .as_ref()
975 .and_then(|params| params.cache_path.as_ref())
976 {
977 if cfg!(any(target_arch = "arm", target_arch = "aarch64")) && cfg.sandbox {
978 warn!("shader caching not yet supported on ARM with sandbox enabled");
979 env::set_var("MESA_GLSL_CACHE_DISABLE", "true");
980 } else {
John Bates04059732020-10-01 15:58:55 -0700981 env::set_var("MESA_GLSL_CACHE_DISABLE", "false");
John Batesb220eac2020-09-14 17:03:02 -0700982 env::set_var("MESA_GLSL_CACHE_DIR", cache_dir);
983 if let Some(cache_size) = cfg
984 .gpu_parameters
985 .as_ref()
986 .and_then(|params| params.cache_size.as_ref())
987 {
988 env::set_var("MESA_GLSL_CACHE_MAX_SIZE", cache_size);
989 }
990 let shadercache_path = Path::new(cache_dir);
991 jail.mount_bind(shadercache_path, shadercache_path, true)?;
992 }
993 }
994
David Riley06787c52019-07-24 12:09:07 -0700995 // If the ARM specific devices exist on the host, bind mount them in.
996 let mali0_path = Path::new("/dev/mali0");
997 if mali0_path.exists() {
998 jail.mount_bind(mali0_path, mali0_path, true)?;
999 }
1000
1001 let pvr_sync_path = Path::new("/dev/pvr_sync");
1002 if pvr_sync_path.exists() {
1003 jail.mount_bind(pvr_sync_path, pvr_sync_path, true)?;
1004 }
1005
Gurchetan Singhb66d6f62019-11-08 10:41:29 -08001006 // If the udmabuf driver exists on the host, bind mount it in.
1007 let udmabuf_path = Path::new("/dev/udmabuf");
1008 if udmabuf_path.exists() {
1009 jail.mount_bind(udmabuf_path, udmabuf_path, true)?;
1010 }
1011
David Tolnay2b089fc2019-03-04 15:33:22 -08001012 // Libraries that are required when mesa drivers are dynamically loaded.
Chia-I Wud562b1a2020-12-27 21:08:27 -08001013 let lib_dirs = &[
1014 "/usr/lib",
1015 "/usr/lib64",
1016 "/lib",
1017 "/lib64",
1018 "/usr/share/vulkan",
1019 ];
David Riley06787c52019-07-24 12:09:07 -07001020 for dir in lib_dirs {
1021 let dir_path = Path::new(dir);
1022 if dir_path.exists() {
1023 jail.mount_bind(dir_path, dir_path, false)?;
1024 }
1025 }
David Tolnay2b089fc2019-03-04 15:33:22 -08001026
Gurchetan Singh1bbbf1c2021-05-19 15:05:56 -07001027 // Bind mount the wayland socket's directory into jail's root. This is necessary since
1028 // each new wayland context must open() the socket. If the wayland socket is ever
1029 // destroyed and remade in the same host directory, new connections will be possible
1030 // without restarting the wayland device.
1031 for dir in &wayland_socket_dirs {
1032 jail.mount_bind(dir, dir, true)?;
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001033 }
David Tolnay2b089fc2019-03-04 15:33:22 -08001034
1035 add_crosvm_user_to_jail(&mut jail, "gpu")?;
1036
David Riley54e660b2019-07-24 17:22:50 -07001037 // pvr driver requires read access to /proc/self/task/*/comm.
1038 let proc_path = Path::new("/proc");
1039 jail.mount(
1040 proc_path,
1041 proc_path,
1042 "proc",
1043 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY) as usize,
1044 )?;
1045
John Bates0d9d0e32020-12-03 11:37:33 -08001046 // To enable perfetto tracing, we need to give access to the perfetto service IPC
1047 // endpoints.
1048 let perfetto_path = Path::new("/run/perfetto");
1049 if perfetto_path.exists() {
1050 jail.mount_bind(perfetto_path, perfetto_path, true)?;
1051 }
1052
David Tolnay2b089fc2019-03-04 15:33:22 -08001053 Some(jail)
1054 }
1055 None => None,
1056 };
1057
1058 Ok(VirtioDeviceStub {
1059 dev: Box::new(dev),
1060 jail,
1061 })
1062}
1063
1064fn create_wayland_device(
1065 cfg: &Config,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001066 control_tube: Tube,
1067 resource_bridge: Option<Tube>,
David Tolnay2b089fc2019-03-04 15:33:22 -08001068) -> DeviceResult {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001069 let wayland_socket_dirs = cfg
1070 .wayland_socket_paths
1071 .iter()
1072 .map(|(_name, path)| path.parent())
1073 .collect::<Option<Vec<_>>>()
1074 .ok_or(Error::InvalidWaylandPath)?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001075
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001076 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +01001077 let dev = virtio::Wl::new(
1078 features,
1079 cfg.wayland_socket_paths.clone(),
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001080 control_tube,
Will Deacon81d5adb2020-10-06 18:37:48 +01001081 resource_bridge,
1082 )
1083 .map_err(Error::WaylandDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001084
Matt Delco45caf912019-11-13 08:11:09 -08001085 let jail = match simple_jail(&cfg, "wl_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -08001086 Some(mut jail) => {
1087 // Create a tmpfs in the device's root directory so that we can bind mount the wayland
1088 // socket directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
1089 jail.mount_with_data(
1090 Path::new("none"),
1091 Path::new("/"),
1092 "tmpfs",
1093 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
1094 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -08001095 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001096
1097 // Bind mount the wayland socket's directory into jail's root. This is necessary since
1098 // each new wayland context must open() the socket. If the wayland socket is ever
1099 // destroyed and remade in the same host directory, new connections will be possible
1100 // without restarting the wayland device.
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001101 for dir in &wayland_socket_dirs {
1102 jail.mount_bind(dir, dir, true)?;
1103 }
David Tolnay2b089fc2019-03-04 15:33:22 -08001104 add_crosvm_user_to_jail(&mut jail, "Wayland")?;
1105
1106 Some(jail)
1107 }
1108 None => None,
1109 };
1110
1111 Ok(VirtioDeviceStub {
1112 dev: Box::new(dev),
1113 jail,
1114 })
1115}
1116
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001117#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
1118fn create_video_device(
1119 cfg: &Config,
1120 typ: devices::virtio::VideoDeviceType,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001121 resource_bridge: Tube,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001122) -> DeviceResult {
1123 let jail = match simple_jail(&cfg, "video_device")? {
1124 Some(mut jail) => {
1125 match typ {
1126 devices::virtio::VideoDeviceType::Decoder => {
1127 add_crosvm_user_to_jail(&mut jail, "video-decoder")?
1128 }
1129 devices::virtio::VideoDeviceType::Encoder => {
1130 add_crosvm_user_to_jail(&mut jail, "video-encoder")?
1131 }
1132 };
1133
1134 // Create a tmpfs in the device's root directory so that we can bind mount files.
1135 jail.mount_with_data(
1136 Path::new("none"),
1137 Path::new("/"),
1138 "tmpfs",
1139 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
1140 "size=67108864",
1141 )?;
1142
1143 // Render node for libvda.
1144 let dev_dri_path = Path::new("/dev/dri/renderD128");
1145 jail.mount_bind(dev_dri_path, dev_dri_path, false)?;
1146
David Stevense341d0a2020-10-08 18:02:32 +09001147 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1148 {
1149 // Device nodes used by libdrm through minigbm in libvda on AMD devices.
1150 let sys_dev_char_path = Path::new("/sys/dev/char");
1151 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
1152 let sys_devices_path = Path::new("/sys/devices");
1153 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
1154
1155 // Required for loading dri libraries loaded by minigbm on AMD devices.
1156 let lib_dir = Path::new("/usr/lib64");
1157 jail.mount_bind(lib_dir, lib_dir, false)?;
1158 }
1159
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001160 // Device nodes required by libchrome which establishes Mojo connection in libvda.
1161 let dev_urandom_path = Path::new("/dev/urandom");
1162 jail.mount_bind(dev_urandom_path, dev_urandom_path, false)?;
1163 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
1164 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
1165
1166 Some(jail)
1167 }
1168 None => None,
1169 };
1170
1171 Ok(VirtioDeviceStub {
1172 dev: Box::new(devices::virtio::VideoDevice::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001173 virtio::base_features(cfg.protected_vm),
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001174 typ,
1175 Some(resource_bridge),
1176 )),
1177 jail,
1178 })
1179}
1180
1181#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
1182fn register_video_device(
1183 devs: &mut Vec<VirtioDeviceStub>,
Daniel Verkampffb59122021-03-18 14:06:15 -07001184 video_tube: Tube,
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001185 cfg: &Config,
1186 typ: devices::virtio::VideoDeviceType,
1187) -> std::result::Result<(), Error> {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001188 devs.push(create_video_device(cfg, typ, video_tube)?);
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001189 Ok(())
1190}
1191
David Tolnay2b089fc2019-03-04 15:33:22 -08001192fn create_vhost_vsock_device(cfg: &Config, cid: u64, mem: &GuestMemory) -> DeviceResult {
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001193 let features = virtio::base_features(cfg.protected_vm);
Christian Blichmann2f5d4b62021-03-10 18:08:08 +01001194 let dev = virtio::vhost::Vsock::new(&cfg.vhost_vsock_device_path, features, cid, mem)
1195 .map_err(Error::VhostVsockDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001196
1197 Ok(VirtioDeviceStub {
1198 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -08001199 jail: simple_jail(&cfg, "vhost_vsock_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -08001200 })
1201}
1202
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001203fn create_fs_device(
1204 cfg: &Config,
1205 uid_map: &str,
1206 gid_map: &str,
1207 src: &Path,
1208 tag: &str,
1209 fs_cfg: virtio::fs::passthrough::Config,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001210 device_tube: Tube,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001211) -> DeviceResult {
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001212 let max_open_files = get_max_open_files()?;
Matt Delcoc24ad782020-02-14 13:24:36 -08001213 let j = if cfg.sandbox {
1214 let seccomp_policy = cfg.seccomp_policy_dir.join("fs_device");
1215 let config = SandboxConfig {
1216 limit_caps: false,
1217 uid_map: Some(uid_map),
1218 gid_map: Some(gid_map),
1219 log_failures: cfg.seccomp_log_failures,
1220 seccomp_policy: &seccomp_policy,
1221 };
Chirantan Ekbote34d45e52020-04-20 18:15:02 +09001222 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
1223 // We want bind mounts from the parent namespaces to propagate into the fs device's
1224 // namespace.
1225 jail.set_remount_mode(libc::MS_SLAVE);
1226
1227 jail
Matt Delcoc24ad782020-02-14 13:24:36 -08001228 } else {
1229 create_base_minijail(src, Some(max_open_files), None)?
1230 };
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001231
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001232 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001233 // TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic
1234 // when num_queues > 1.
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001235 let dev =
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001236 virtio::fs::Fs::new(features, tag, 1, fs_cfg, device_tube).map_err(Error::FsDeviceNew)?;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001237
1238 Ok(VirtioDeviceStub {
1239 dev: Box::new(dev),
1240 jail: Some(j),
1241 })
1242}
1243
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001244fn create_9p_device(
1245 cfg: &Config,
1246 uid_map: &str,
1247 gid_map: &str,
1248 src: &Path,
1249 tag: &str,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001250 mut p9_cfg: p9::Config,
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001251) -> DeviceResult {
1252 let max_open_files = get_max_open_files()?;
1253 let (jail, root) = if cfg.sandbox {
1254 let seccomp_policy = cfg.seccomp_policy_dir.join("9p_device");
1255 let config = SandboxConfig {
1256 limit_caps: false,
1257 uid_map: Some(uid_map),
1258 gid_map: Some(gid_map),
1259 log_failures: cfg.seccomp_log_failures,
1260 seccomp_policy: &seccomp_policy,
1261 };
David Tolnay2b089fc2019-03-04 15:33:22 -08001262
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001263 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
1264 // We want bind mounts from the parent namespaces to propagate into the 9p server's
1265 // namespace.
1266 jail.set_remount_mode(libc::MS_SLAVE);
Chirantan Ekbote055de382020-01-24 12:16:58 +09001267
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001268 // The shared directory becomes the root of the device's file system.
1269 let root = Path::new("/");
1270 (Some(jail), root)
1271 } else {
1272 // There's no mount namespace so we tell the server to treat the source directory as the
1273 // root.
1274 (None, src)
David Tolnay2b089fc2019-03-04 15:33:22 -08001275 };
1276
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001277 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001278 p9_cfg.root = root.into();
1279 let dev = virtio::P9::new(features, tag, p9_cfg).map_err(Error::P9DeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001280
1281 Ok(VirtioDeviceStub {
1282 dev: Box::new(dev),
1283 jail,
1284 })
1285}
1286
Jakub Starona3411ea2019-04-24 10:55:25 -07001287fn create_pmem_device(
1288 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001289 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001290 resources: &mut SystemAllocator,
1291 disk: &DiskOption,
1292 index: usize,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001293 pmem_device_tube: Tube,
Jakub Starona3411ea2019-04-24 10:55:25 -07001294) -> DeviceResult {
Mike Gerowec618a52021-02-26 20:57:14 +00001295 // Special case '/proc/self/fd/*' paths. The FD is already open, just use it.
1296 let fd: File = if disk.path.parent() == Some(Path::new("/proc/self/fd")) {
1297 // Safe because we will validate |raw_fd|.
1298 unsafe { File::from_raw_descriptor(raw_descriptor_from_path(&disk.path)?) }
1299 } else {
1300 OpenOptions::new()
1301 .read(true)
1302 .write(!disk.read_only)
1303 .open(&disk.path)
1304 .map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?
1305 };
Jakub Starona3411ea2019-04-24 10:55:25 -07001306
Iliyan Malcheved149862020-04-17 23:57:47 +00001307 let arena_size = {
Daniel Verkamp46d61ba2020-02-25 10:17:50 -08001308 let metadata =
1309 std::fs::metadata(&disk.path).map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001310 let disk_len = metadata.len();
1311 // Linux requires pmem region sizes to be 2 MiB aligned. Linux will fill any partial page
1312 // at the end of an mmap'd file and won't write back beyond the actual file length, but if
1313 // we just align the size of the file to 2 MiB then access beyond the last page of the
1314 // mapped file will generate SIGBUS. So use a memory mapping arena that will provide
1315 // padding up to 2 MiB.
1316 let alignment = 2 * 1024 * 1024;
1317 let align_adjust = if disk_len % alignment != 0 {
1318 alignment - (disk_len % alignment)
1319 } else {
1320 0
1321 };
Iliyan Malcheved149862020-04-17 23:57:47 +00001322 disk_len
1323 .checked_add(align_adjust)
1324 .ok_or(Error::PmemDeviceImageTooBig)?
Jakub Starona3411ea2019-04-24 10:55:25 -07001325 };
1326
1327 let protection = {
1328 if disk.read_only {
1329 Protection::read()
1330 } else {
1331 Protection::read_write()
1332 }
1333 };
1334
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001335 let arena = {
Jakub Starona3411ea2019-04-24 10:55:25 -07001336 // Conversion from u64 to usize may fail on 32bit system.
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001337 let arena_size = usize::try_from(arena_size).map_err(|_| Error::PmemDeviceImageTooBig)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001338
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001339 let mut arena = MemoryMappingArena::new(arena_size).map_err(Error::ReservePmemMemory)?;
1340 arena
Iliyan Malcheved149862020-04-17 23:57:47 +00001341 .add_fd_offset_protection(0, arena_size, &fd, 0, protection)
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001342 .map_err(Error::ReservePmemMemory)?;
1343 arena
Jakub Starona3411ea2019-04-24 10:55:25 -07001344 };
1345
1346 let mapping_address = resources
Xiong Zhang383b3b52019-10-30 14:59:26 +08001347 .mmio_allocator(MmioType::High)
Jakub Starona3411ea2019-04-24 10:55:25 -07001348 .allocate_with_align(
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001349 arena_size,
Jakub Starona3411ea2019-04-24 10:55:25 -07001350 Alloc::PmemDevice(index),
1351 format!("pmem_disk_image_{}", index),
1352 // Linux kernel requires pmem namespaces to be 128 MiB aligned.
1353 128 * 1024 * 1024, /* 128 MiB */
1354 )
1355 .map_err(Error::AllocatePmemDeviceAddress)?;
1356
Daniel Verkampe1980a92020-02-07 11:00:55 -08001357 let slot = vm
Gurchetan Singh173fe622020-05-21 18:05:06 -07001358 .add_memory_region(
Daniel Verkampe1980a92020-02-07 11:00:55 -08001359 GuestAddress(mapping_address),
Gurchetan Singh173fe622020-05-21 18:05:06 -07001360 Box::new(arena),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001361 /* read_only = */ disk.read_only,
1362 /* log_dirty_pages = */ false,
1363 )
1364 .map_err(Error::AddPmemDeviceMemory)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001365
Daniel Verkampe1980a92020-02-07 11:00:55 -08001366 let dev = virtio::Pmem::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001367 virtio::base_features(cfg.protected_vm),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001368 fd,
1369 GuestAddress(mapping_address),
1370 slot,
1371 arena_size,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001372 Some(pmem_device_tube),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001373 )
1374 .map_err(Error::PmemDeviceNew)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001375
1376 Ok(VirtioDeviceStub {
1377 dev: Box::new(dev) as Box<dyn VirtioDevice>,
Matt Delco45caf912019-11-13 08:11:09 -08001378 jail: simple_jail(&cfg, "pmem_device")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07001379 })
1380}
1381
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001382fn create_console_device(cfg: &Config, param: &SerialParameters) -> DeviceResult {
Michael Hoylecd23bc22020-10-20 22:12:20 -07001383 let mut keep_rds = Vec::new();
Michael Hoyle685316f2020-09-16 15:29:20 -07001384 let evt = Event::new().map_err(Error::CreateEvent)?;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001385 let dev = param
Michael Hoylecd23bc22020-10-20 22:12:20 -07001386 .create_serial_device::<Console>(cfg.protected_vm, &evt, &mut keep_rds)
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001387 .map_err(Error::CreateConsole)?;
1388
Nicholas Verne71e73d82020-07-08 17:19:55 +10001389 let jail = match simple_jail(&cfg, "serial")? {
1390 Some(mut jail) => {
1391 // Create a tmpfs in the device's root directory so that we can bind mount the
1392 // log socket directory into it.
1393 // The size=67108864 is size=64*1024*1024 or size=64MB.
1394 jail.mount_with_data(
1395 Path::new("none"),
1396 Path::new("/"),
1397 "tmpfs",
1398 (libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_NOSUID) as usize,
1399 "size=67108864",
1400 )?;
1401 add_crosvm_user_to_jail(&mut jail, "serial")?;
1402 let res = param.add_bind_mounts(&mut jail);
1403 if res.is_err() {
1404 error!("failed to add bind mounts for console device");
1405 }
1406 Some(jail)
1407 }
1408 None => None,
1409 };
1410
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001411 Ok(VirtioDeviceStub {
1412 dev: Box::new(dev),
Nicholas Verne71e73d82020-07-08 17:19:55 +10001413 jail, // TODO(dverkamp): use a separate policy for console?
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001414 })
1415}
1416
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001417// gpu_device_tube is not used when GPU support is disabled.
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -07001418#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001419fn create_virtio_devices(
1420 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001421 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001422 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001423 _exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001424 wayland_device_tube: Tube,
1425 gpu_device_tube: Tube,
1426 balloon_device_tube: Tube,
1427 disk_device_tubes: &mut Vec<Tube>,
1428 pmem_device_tubes: &mut Vec<Tube>,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001429 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001430 fs_device_tubes: &mut Vec<Tube>,
David Tolnay2b089fc2019-03-04 15:33:22 -08001431) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -07001432 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -08001433
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001434 for (_, param) in cfg
1435 .serial_parameters
1436 .iter()
1437 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
1438 {
1439 let dev = create_console_device(cfg, param)?;
1440 devs.push(dev);
1441 }
1442
Zach Reizner8fb52112017-12-13 16:04:39 -08001443 for disk in &cfg.disks {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001444 let disk_device_tube = disk_device_tubes.remove(0);
1445 devs.push(create_block_device(cfg, disk, disk_device_tube)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001446 }
1447
Keiichi Watanabef3a37f42021-01-21 15:41:11 +09001448 for blk in &cfg.vhost_user_blk {
1449 devs.push(create_vhost_user_block_device(cfg, blk)?);
1450 }
1451
Jakub Starona3411ea2019-04-24 10:55:25 -07001452 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001453 let pmem_device_tube = pmem_device_tubes.remove(0);
Daniel Verkampe1980a92020-02-07 11:00:55 -08001454 devs.push(create_pmem_device(
1455 cfg,
1456 vm,
1457 resources,
1458 pmem_disk,
1459 index,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001460 pmem_device_tube,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001461 )?);
Jakub Starona3411ea2019-04-24 10:55:25 -07001462 }
1463
David Tolnay2b089fc2019-03-04 15:33:22 -08001464 devs.push(create_rng_device(cfg)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001465
David Tolnayde6b29a2018-12-20 11:49:46 -08001466 #[cfg(feature = "tpm")]
1467 {
David Tolnay43f8e212019-02-13 17:28:16 -08001468 if cfg.software_tpm {
David Tolnay2b089fc2019-03-04 15:33:22 -08001469 devs.push(create_tpm_device(cfg)?);
David Tolnay43f8e212019-02-13 17:28:16 -08001470 }
David Tolnayde6b29a2018-12-20 11:49:46 -08001471 }
1472
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001473 for (idx, single_touch_spec) in cfg.virtio_single_touch.iter().enumerate() {
1474 devs.push(create_single_touch_device(
1475 cfg,
1476 single_touch_spec,
1477 idx as u32,
1478 )?);
Jorge E. Moreira99d3f082019-03-07 10:59:54 -08001479 }
1480
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001481 for (idx, multi_touch_spec) in cfg.virtio_multi_touch.iter().enumerate() {
1482 devs.push(create_multi_touch_device(
1483 cfg,
1484 multi_touch_spec,
1485 idx as u32,
1486 )?);
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001487 }
1488
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001489 for (idx, trackpad_spec) in cfg.virtio_trackpad.iter().enumerate() {
1490 devs.push(create_trackpad_device(cfg, trackpad_spec, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001491 }
1492
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001493 for (idx, mouse_socket) in cfg.virtio_mice.iter().enumerate() {
1494 devs.push(create_mouse_device(cfg, mouse_socket, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001495 }
1496
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001497 for (idx, keyboard_socket) in cfg.virtio_keyboard.iter().enumerate() {
1498 devs.push(create_keyboard_device(cfg, keyboard_socket, idx as u32)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001499 }
1500
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001501 for (idx, switches_socket) in cfg.virtio_switches.iter().enumerate() {
1502 devs.push(create_switches_device(cfg, switches_socket, idx as u32)?);
Daniel Norman5e23df72021-03-11 10:11:02 -08001503 }
1504
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001505 for dev_path in &cfg.virtio_input_evdevs {
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001506 devs.push(create_vinput_device(cfg, &dev_path)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001507 }
1508
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001509 devs.push(create_balloon_device(cfg, balloon_device_tube)?);
Dylan Reid295ccac2017-11-06 14:06:24 -08001510
Zach Reizner39aa26b2017-12-12 18:03:23 -08001511 // We checked above that if the IP is defined, then the netmask is, too.
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001512 for tap_fd in &cfg.tap_fd {
David Tolnay2b089fc2019-03-04 15:33:22 -08001513 devs.push(create_tap_net_device(cfg, *tap_fd)?);
Jorge E. Moreirab7952802019-02-12 16:43:05 -08001514 }
1515
David Tolnay2b089fc2019-03-04 15:33:22 -08001516 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
1517 (cfg.host_ip, cfg.netmask, cfg.mac_address)
1518 {
Keiichi Watanabe60686582021-03-12 04:53:51 +09001519 if !cfg.vhost_user_net.is_empty() {
1520 return Err(Error::VhostUserNetWithNetArgs);
1521 }
Zach Reiznerdc748482021-04-14 13:59:30 -07001522 devs.push(create_net_device(
1523 cfg,
1524 host_ip,
1525 netmask,
1526 mac_address,
1527 vm.get_memory(),
1528 )?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001529 }
1530
Keiichi Watanabe60686582021-03-12 04:53:51 +09001531 for net in &cfg.vhost_user_net {
1532 devs.push(create_vhost_user_net_device(cfg, net)?);
1533 }
1534
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09001535 for opt in &cfg.vhost_user_wl {
1536 devs.push(create_vhost_user_wl_device(cfg, opt)?);
1537 }
1538
David Tolnayfa701712019-02-13 16:42:54 -08001539 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001540 let mut resource_bridges = Vec::<Tube>::new();
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001541
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001542 if !cfg.wayland_socket_paths.is_empty() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001543 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001544 let mut wl_resource_bridge = None::<Tube>;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001545
1546 #[cfg(feature = "gpu")]
1547 {
Jason Macnakcc7070b2019-11-06 14:48:12 -08001548 if cfg.gpu_parameters.is_some() {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001549 let (wl_socket, gpu_socket) = Tube::pair().map_err(Error::CreateTube)?;
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001550 resource_bridges.push(gpu_socket);
1551 wl_resource_bridge = Some(wl_socket);
1552 }
1553 }
1554
1555 devs.push(create_wayland_device(
1556 cfg,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001557 wayland_device_tube,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001558 wl_resource_bridge,
1559 )?);
1560 }
David Tolnayfa701712019-02-13 16:42:54 -08001561
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001562 #[cfg(feature = "video-decoder")]
Daniel Verkampffb59122021-03-18 14:06:15 -07001563 let video_dec_tube = if cfg.video_dec {
1564 let (video_tube, gpu_tube) = Tube::pair().map_err(Error::CreateTube)?;
1565 resource_bridges.push(gpu_tube);
1566 Some(video_tube)
1567 } else {
1568 None
1569 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001570
1571 #[cfg(feature = "video-encoder")]
Daniel Verkampffb59122021-03-18 14:06:15 -07001572 let video_enc_tube = if cfg.video_enc {
1573 let (video_tube, gpu_tube) = Tube::pair().map_err(Error::CreateTube)?;
1574 resource_bridges.push(gpu_tube);
1575 Some(video_tube)
1576 } else {
1577 None
1578 };
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001579
Zach Reizner3a8100a2017-09-13 19:15:43 -07001580 #[cfg(feature = "gpu")]
1581 {
Noah Golddc7f52b2020-02-01 13:01:58 -08001582 if let Some(gpu_parameters) = &cfg.gpu_parameters {
Zach Reizner65b98f12019-11-22 17:34:58 -08001583 let mut event_devices = Vec::new();
1584 if cfg.display_window_mouse {
1585 let (event_device_socket, virtio_dev_socket) =
1586 UnixStream::pair().map_err(Error::CreateSocket)?;
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001587 let (multi_touch_width, multi_touch_height) = cfg
1588 .virtio_multi_touch
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001589 .first()
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001590 .as_ref()
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001591 .map(|multi_touch_spec| multi_touch_spec.get_size())
Noah Golddc7f52b2020-02-01 13:01:58 -08001592 .unwrap_or((gpu_parameters.display_width, gpu_parameters.display_height));
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001593 let dev = virtio::new_multi_touch(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001594 // u32::MAX is the least likely to collide with the indices generated above for
1595 // the multi_touch options, which begin at 0.
1596 u32::MAX,
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001597 virtio_dev_socket,
Tristan Muntsinger486cffc2020-09-29 22:05:41 +00001598 multi_touch_width,
1599 multi_touch_height,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001600 virtio::base_features(cfg.protected_vm),
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001601 )
1602 .map_err(Error::InputDeviceNew)?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001603 devs.push(VirtioDeviceStub {
1604 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -08001605 jail: simple_jail(&cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001606 });
1607 event_devices.push(EventDevice::touchscreen(event_device_socket));
1608 }
1609 if cfg.display_window_keyboard {
1610 let (event_device_socket, virtio_dev_socket) =
1611 UnixStream::pair().map_err(Error::CreateSocket)?;
Noah Goldd4ca29b2020-10-27 12:21:52 -07001612 let dev = virtio::new_keyboard(
Jorge E. Moreira6635ca42021-04-28 13:11:41 -07001613 // u32::MAX is the least likely to collide with the indices generated above for
1614 // the multi_touch options, which begin at 0.
1615 u32::MAX,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001616 virtio_dev_socket,
1617 virtio::base_features(cfg.protected_vm),
1618 )
1619 .map_err(Error::InputDeviceNew)?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001620 devs.push(VirtioDeviceStub {
1621 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -08001622 jail: simple_jail(&cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001623 });
1624 event_devices.push(EventDevice::keyboard(event_device_socket));
1625 }
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001626 devs.push(create_gpu_device(
1627 cfg,
1628 _exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001629 gpu_device_tube,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001630 resource_bridges,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001631 // Use the unnamed socket for GPU display screens.
1632 cfg.wayland_socket_paths.get(""),
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001633 cfg.x_display.clone(),
Zach Reizner65b98f12019-11-22 17:34:58 -08001634 event_devices,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001635 map_request,
Zach Reiznerdc748482021-04-14 13:59:30 -07001636 vm.get_memory(),
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001637 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -07001638 }
1639 }
1640
Daniel Verkampffb59122021-03-18 14:06:15 -07001641 #[cfg(feature = "video-decoder")]
1642 {
1643 if let Some(video_dec_tube) = video_dec_tube {
1644 register_video_device(
1645 &mut devs,
1646 video_dec_tube,
1647 cfg,
1648 devices::virtio::VideoDeviceType::Decoder,
1649 )?;
1650 }
1651 }
1652
1653 #[cfg(feature = "video-encoder")]
1654 {
1655 if let Some(video_enc_tube) = video_enc_tube {
1656 register_video_device(
1657 &mut devs,
1658 video_enc_tube,
1659 cfg,
1660 devices::virtio::VideoDeviceType::Encoder,
1661 )?;
1662 }
1663 }
1664
Zach Reizneraa575662018-08-15 10:46:32 -07001665 if let Some(cid) = cfg.cid {
Zach Reiznerdc748482021-04-14 13:59:30 -07001666 devs.push(create_vhost_vsock_device(cfg, cid, vm.get_memory())?);
Zach Reizneraa575662018-08-15 10:46:32 -07001667 }
1668
Woody Chow5890b702021-02-12 14:57:02 +09001669 for vhost_user_fs in &cfg.vhost_user_fs {
1670 devs.push(create_vhost_user_fs_device(cfg, &vhost_user_fs)?);
1671 }
1672
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001673 for shared_dir in &cfg.shared_dirs {
1674 let SharedDir {
1675 src,
1676 tag,
1677 kind,
1678 uid_map,
1679 gid_map,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001680 fs_cfg,
1681 p9_cfg,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001682 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -08001683
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001684 let dev = match kind {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001685 SharedDirKind::FS => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001686 let device_tube = fs_device_tubes.remove(0);
1687 create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone(), device_tube)?
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09001688 }
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001689 SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag, p9_cfg.clone())?,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001690 };
1691 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -08001692 }
1693
1694 Ok(devs)
1695}
1696
1697fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -06001698 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001699 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001700 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001701 exit_evt: &Event,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001702 control_tubes: &mut Vec<TaggedControlTube>,
1703 wayland_device_tube: Tube,
1704 gpu_device_tube: Tube,
1705 balloon_device_tube: Tube,
1706 disk_device_tubes: &mut Vec<Tube>,
1707 pmem_device_tubes: &mut Vec<Tube>,
1708 fs_device_tubes: &mut Vec<Tube>,
Daniel Verkampf1439d42021-05-21 13:55:10 -07001709 #[cfg(feature = "usb")] usb_provider: HostBackendDeviceProvider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001710 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnayfdac5ed2019-03-08 16:56:14 -08001711) -> DeviceResult<Vec<(Box<dyn PciDevice>, Option<Minijail>)>> {
David Tolnay2b089fc2019-03-04 15:33:22 -08001712 let stubs = create_virtio_devices(
1713 &cfg,
Jakub Starona3411ea2019-04-24 10:55:25 -07001714 vm,
1715 resources,
David Tolnay2b089fc2019-03-04 15:33:22 -08001716 exit_evt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001717 wayland_device_tube,
1718 gpu_device_tube,
1719 balloon_device_tube,
1720 disk_device_tubes,
1721 pmem_device_tubes,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001722 map_request,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001723 fs_device_tubes,
David Tolnay2b089fc2019-03-04 15:33:22 -08001724 )?;
1725
1726 let mut pci_devices = Vec::new();
1727
1728 for stub in stubs {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001729 let (msi_host_tube, msi_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
1730 control_tubes.push(TaggedControlTube::VmIrq(msi_host_tube));
Zach Reiznerdc748482021-04-14 13:59:30 -07001731 let dev = VirtioPciDevice::new(vm.get_memory().clone(), stub.dev, msi_device_tube)
Daniel Verkampbb712d62019-11-19 09:47:33 -08001732 .map_err(Error::VirtioPciDev)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -08001733 let dev = Box::new(dev) as Box<dyn PciDevice>;
David Tolnay2b089fc2019-03-04 15:33:22 -08001734 pci_devices.push((dev, stub.jail));
1735 }
1736
Andrew Scull1590e6f2020-03-18 18:00:47 +00001737 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +08001738 for ac97_param in &cfg.ac97_parameters {
Zach Reiznerdc748482021-04-14 13:59:30 -07001739 let dev = Ac97Dev::try_new(vm.get_memory().clone(), ac97_param.clone())
1740 .map_err(Error::CreateAc97)?;
paulhsiace17e6e2020-08-28 18:37:45 +08001741 let jail = simple_jail(&cfg, dev.minijail_policy())?;
1742 pci_devices.push((Box::new(dev), jail));
David Tolnay2b089fc2019-03-04 15:33:22 -08001743 }
Andrew Scull1590e6f2020-03-18 18:00:47 +00001744
Daniel Verkampf1439d42021-05-21 13:55:10 -07001745 #[cfg(feature = "usb")]
1746 {
1747 // Create xhci controller.
1748 let usb_controller = Box::new(XhciController::new(vm.get_memory().clone(), usb_provider));
1749 pci_devices.push((usb_controller, simple_jail(&cfg, "xhci")?));
1750 }
David Tolnay2b089fc2019-03-04 15:33:22 -08001751
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001752 if !cfg.vfio.is_empty() {
Xiong Zhangea6cf662019-11-11 18:32:02 +08001753 let vfio_container = Arc::new(Mutex::new(
1754 VfioContainer::new().map_err(Error::CreateVfioDevice)?,
1755 ));
1756
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001757 for vfio_path in &cfg.vfio {
Daniel Verkamp10154a92020-09-28 17:44:40 -07001758 // create MSI, MSI-X, and Mem request sockets for each vfio device
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001759 let (vfio_host_tube_msi, vfio_device_tube_msi) =
1760 Tube::pair().map_err(Error::CreateTube)?;
1761 control_tubes.push(TaggedControlTube::VmIrq(vfio_host_tube_msi));
Daniel Verkamp10154a92020-09-28 17:44:40 -07001762
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001763 let (vfio_host_tube_msix, vfio_device_tube_msix) =
1764 Tube::pair().map_err(Error::CreateTube)?;
1765 control_tubes.push(TaggedControlTube::VmIrq(vfio_host_tube_msix));
Xiong Zhang4b5bb3a2019-04-23 17:15:21 +08001766
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001767 let (vfio_host_tube_mem, vfio_device_tube_mem) =
1768 Tube::pair().map_err(Error::CreateTube)?;
1769 control_tubes.push(TaggedControlTube::VmMemory(vfio_host_tube_mem));
Xiong Zhang85abeff2019-04-23 17:15:24 +08001770
Zach Reiznerdc748482021-04-14 13:59:30 -07001771 let vfiodevice = VfioDevice::new(
1772 vfio_path.as_path(),
1773 vm,
1774 vm.get_memory(),
1775 vfio_container.clone(),
1776 )
1777 .map_err(Error::CreateVfioDevice)?;
Tomasz Jeznach502b5de2021-02-03 21:45:47 -08001778 let mut vfiopcidevice = Box::new(VfioPciDevice::new(
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001779 vfiodevice,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001780 vfio_device_tube_msi,
1781 vfio_device_tube_msix,
1782 vfio_device_tube_mem,
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001783 ));
Tomasz Jeznach502b5de2021-02-03 21:45:47 -08001784 // early reservation for pass-through PCI devices.
1785 if vfiopcidevice.allocate_address(resources).is_err() {
1786 warn!(
1787 "address reservation failed for vfio {}",
1788 vfiopcidevice.debug_label()
1789 );
1790 }
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001791 pci_devices.push((vfiopcidevice, simple_jail(&cfg, "vfio_device")?));
1792 }
Xiong Zhang17b0daf2019-04-23 17:14:50 +08001793 }
1794
David Tolnay2b089fc2019-03-04 15:33:22 -08001795 Ok(pci_devices)
1796}
1797
1798#[derive(Copy, Clone)]
Chirantan Ekbote1a2683b2019-11-26 16:28:23 +09001799#[cfg_attr(not(feature = "tpm"), allow(dead_code))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001800struct Ids {
1801 uid: uid_t,
1802 gid: gid_t,
1803}
1804
David Tolnay48c48292019-03-01 16:54:25 -08001805// Set the uid/gid for the jailed process and give a basic id map. This is
1806// required for bind mounts to work.
David Tolnayfd0971d2019-03-04 17:15:57 -08001807fn add_crosvm_user_to_jail(jail: &mut Minijail, feature: &str) -> Result<Ids> {
David Tolnay48c48292019-03-01 16:54:25 -08001808 let crosvm_user_group = CStr::from_bytes_with_nul(b"crosvm\0").unwrap();
1809
1810 let crosvm_uid = match get_user_id(&crosvm_user_group) {
1811 Ok(u) => u,
1812 Err(e) => {
1813 warn!("falling back to current user id for {}: {}", feature, e);
1814 geteuid()
1815 }
1816 };
1817
1818 let crosvm_gid = match get_group_id(&crosvm_user_group) {
1819 Ok(u) => u,
1820 Err(e) => {
1821 warn!("falling back to current group id for {}: {}", feature, e);
1822 getegid()
1823 }
1824 };
1825
1826 jail.change_uid(crosvm_uid);
1827 jail.change_gid(crosvm_gid);
1828 jail.uidmap(&format!("{0} {0} 1", crosvm_uid))
1829 .map_err(Error::SettingUidMap)?;
1830 jail.gidmap(&format!("{0} {0} 1", crosvm_gid))
1831 .map_err(Error::SettingGidMap)?;
1832
David Tolnay41a6f842019-03-01 16:18:44 -08001833 Ok(Ids {
1834 uid: crosvm_uid,
1835 gid: crosvm_gid,
1836 })
David Tolnay48c48292019-03-01 16:54:25 -08001837}
1838
Michael Hoylea596a072020-11-10 19:32:45 -08001839fn raw_descriptor_from_path(path: &Path) -> Result<RawDescriptor> {
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001840 if !path.is_file() {
David Tolnayfd0971d2019-03-04 17:15:57 -08001841 return Err(Error::InvalidFdPath);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001842 }
Michael Hoylea596a072020-11-10 19:32:45 -08001843 let raw_descriptor = path
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001844 .file_name()
1845 .and_then(|fd_osstr| fd_osstr.to_str())
1846 .and_then(|fd_str| fd_str.parse::<c_int>().ok())
1847 .ok_or(Error::InvalidFdPath)?;
Michael Hoylea596a072020-11-10 19:32:45 -08001848 validate_raw_descriptor(raw_descriptor).map_err(Error::ValidateRawDescriptor)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001849}
1850
Zach Reizner65b98f12019-11-22 17:34:58 -08001851trait IntoUnixStream {
1852 fn into_unix_stream(self) -> Result<UnixStream>;
1853}
1854
1855impl<'a> IntoUnixStream for &'a Path {
1856 fn into_unix_stream(self) -> Result<UnixStream> {
1857 if self.parent() == Some(Path::new("/proc/self/fd")) {
1858 // Safe because we will validate |raw_fd|.
Michael Hoylea596a072020-11-10 19:32:45 -08001859 unsafe { Ok(UnixStream::from_raw_fd(raw_descriptor_from_path(self)?)) }
Zach Reizner65b98f12019-11-22 17:34:58 -08001860 } else {
1861 UnixStream::connect(self).map_err(Error::InputEventsOpen)
1862 }
1863 }
1864}
1865impl<'a> IntoUnixStream for &'a PathBuf {
1866 fn into_unix_stream(self) -> Result<UnixStream> {
1867 self.as_path().into_unix_stream()
1868 }
1869}
1870
1871impl IntoUnixStream for UnixStream {
1872 fn into_unix_stream(self) -> Result<UnixStream> {
1873 Ok(self)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001874 }
1875}
1876
Steven Richmanf32d0b42020-06-20 21:45:32 -07001877fn setup_vcpu_signal_handler<T: Vcpu>(use_hypervisor_signals: bool) -> Result<()> {
1878 if use_hypervisor_signals {
Matt Delco84cf9c02019-10-07 22:38:13 -07001879 unsafe {
Allen Webb44c728c2021-03-23 15:22:41 -05001880 extern "C" fn handle_signal(_: c_int) {}
Matt Delco84cf9c02019-10-07 22:38:13 -07001881 // Our signal handler does nothing and is trivially async signal safe.
1882 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
1883 .map_err(Error::RegisterSignalHandler)?;
1884 }
1885 block_signal(SIGRTMIN() + 0).map_err(Error::BlockSignal)?;
1886 } else {
1887 unsafe {
Allen Webb44c728c2021-03-23 15:22:41 -05001888 extern "C" fn handle_signal<T: Vcpu>(_: c_int) {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001889 T::set_local_immediate_exit(true);
Matt Delco84cf9c02019-10-07 22:38:13 -07001890 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001891 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal::<T>)
Matt Delco84cf9c02019-10-07 22:38:13 -07001892 .map_err(Error::RegisterSignalHandler)?;
1893 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001894 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001895 Ok(())
1896}
1897
Steven Richmanf32d0b42020-06-20 21:45:32 -07001898// Sets up a vcpu and converts it into a runnable vcpu.
Zach Reizner2c770e62020-09-30 16:49:59 -07001899fn runnable_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001900 cpu_id: usize,
1901 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07001902 vm: impl VmArch,
Zach Reiznerdc748482021-04-14 13:59:30 -07001903 irq_chip: &mut dyn IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001904 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001905 run_rt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001906 vcpu_affinity: Vec<usize>,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001907 no_smt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001908 has_bios: bool,
1909 use_hypervisor_signals: bool,
Zach Reizner2c770e62020-09-30 16:49:59 -07001910) -> Result<(V, VcpuRunHandle)>
Steven Richmanf32d0b42020-06-20 21:45:32 -07001911where
Zach Reizner2c770e62020-09-30 16:49:59 -07001912 V: VcpuArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001913{
Zach Reizner304e7312020-09-29 16:00:24 -07001914 let mut vcpu = match vcpu {
1915 Some(v) => v,
1916 None => {
1917 // If vcpu is None, it means this arch/hypervisor requires create_vcpu to be called from
1918 // the vcpu thread.
1919 match vm
1920 .create_vcpu(cpu_id)
1921 .map_err(Error::CreateVcpu)?
1922 .downcast::<V>()
1923 {
1924 Ok(v) => *v,
1925 Err(_) => panic!("VM created wrong type of VCPU"),
1926 }
1927 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001928 };
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001929
Steven Richmanf32d0b42020-06-20 21:45:32 -07001930 irq_chip
Zach Reizner304e7312020-09-29 16:00:24 -07001931 .add_vcpu(cpu_id, &vcpu)
Steven Richmanf32d0b42020-06-20 21:45:32 -07001932 .map_err(Error::AddIrqChipVcpu)?;
1933
Daniel Verkampcaf9ced2020-09-29 15:35:02 -07001934 if !vcpu_affinity.is_empty() {
1935 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
1936 error!("Failed to set CPU affinity: {}", e);
1937 }
1938 }
1939
Steven Richmanf32d0b42020-06-20 21:45:32 -07001940 Arch::configure_vcpu(
1941 vm.get_memory(),
1942 vm.get_hypervisor(),
1943 irq_chip,
1944 &mut vcpu,
1945 cpu_id,
1946 vcpu_count,
1947 has_bios,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001948 no_smt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001949 )
1950 .map_err(Error::ConfigureVcpu)?;
1951
Zach Reizner026f72f2021-06-01 14:35:29 -07001952 if let Err(e) = enable_core_scheduling() {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001953 error!("Failed to enable core scheduling: {}", e);
1954 }
1955
Kansho Nishidaab205af2020-08-13 18:17:50 +09001956 if run_rt {
1957 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
1958 if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
1959 .and_then(|_| set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL)))
1960 {
1961 warn!("Failed to set vcpu to real time: {}", e);
1962 }
1963 }
1964
Steven Richmanf32d0b42020-06-20 21:45:32 -07001965 if use_hypervisor_signals {
1966 let mut v = get_blocked_signals().map_err(Error::GetSignalMask)?;
1967 v.retain(|&x| x != SIGRTMIN() + 0);
1968 vcpu.set_signal_mask(&v).map_err(Error::SettingSignalMask)?;
1969 }
1970
Zach Reizner2c770e62020-09-30 16:49:59 -07001971 let vcpu_run_handle = vcpu
1972 .take_run_handle(Some(SIGRTMIN() + 0))
1973 .map_err(Error::RunnableVcpu)?;
1974
1975 Ok((vcpu, vcpu_run_handle))
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001976}
1977
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001978#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1979fn handle_debug_msg<V>(
1980 cpu_id: usize,
1981 vcpu: &V,
1982 guest_mem: &GuestMemory,
1983 d: VcpuDebug,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001984 reply_tube: &mpsc::Sender<VcpuDebugStatusMessage>,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001985) -> Result<()>
1986where
1987 V: VcpuArch + 'static,
1988{
1989 match d {
1990 VcpuDebug::ReadRegs => {
1991 let msg = VcpuDebugStatusMessage {
1992 cpu: cpu_id as usize,
1993 msg: VcpuDebugStatus::RegValues(
1994 Arch::debug_read_registers(vcpu as &V).map_err(Error::HandleDebugCommand)?,
1995 ),
1996 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08001997 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001998 .send(msg)
1999 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
2000 }
2001 VcpuDebug::WriteRegs(regs) => {
2002 Arch::debug_write_registers(vcpu as &V, &regs).map_err(Error::HandleDebugCommand)?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002003 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002004 .send(VcpuDebugStatusMessage {
2005 cpu: cpu_id as usize,
2006 msg: VcpuDebugStatus::CommandComplete,
2007 })
2008 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
2009 }
2010 VcpuDebug::ReadMem(vaddr, len) => {
2011 let msg = VcpuDebugStatusMessage {
2012 cpu: cpu_id as usize,
2013 msg: VcpuDebugStatus::MemoryRegion(
2014 Arch::debug_read_memory(vcpu as &V, guest_mem, vaddr, len)
2015 .unwrap_or(Vec::new()),
2016 ),
2017 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002018 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002019 .send(msg)
2020 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
2021 }
2022 VcpuDebug::WriteMem(vaddr, buf) => {
2023 Arch::debug_write_memory(vcpu as &V, guest_mem, vaddr, &buf)
2024 .map_err(Error::HandleDebugCommand)?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002025 reply_tube
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002026 .send(VcpuDebugStatusMessage {
2027 cpu: cpu_id as usize,
2028 msg: VcpuDebugStatus::CommandComplete,
2029 })
2030 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
2031 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002032 VcpuDebug::EnableSinglestep => {
2033 Arch::debug_enable_singlestep(vcpu as &V).map_err(Error::HandleDebugCommand)?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002034 reply_tube
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002035 .send(VcpuDebugStatusMessage {
2036 cpu: cpu_id as usize,
2037 msg: VcpuDebugStatus::CommandComplete,
2038 })
2039 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
2040 }
2041 VcpuDebug::SetHwBreakPoint(addrs) => {
2042 Arch::debug_set_hw_breakpoints(vcpu as &V, &addrs)
2043 .map_err(Error::HandleDebugCommand)?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002044 reply_tube
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002045 .send(VcpuDebugStatusMessage {
2046 cpu: cpu_id as usize,
2047 msg: VcpuDebugStatus::CommandComplete,
2048 })
2049 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
2050 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002051 }
2052}
2053
Zach Reizner2c770e62020-09-30 16:49:59 -07002054fn run_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002055 cpu_id: usize,
2056 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07002057 vm: impl VmArch + 'static,
Zach Reiznerdc748482021-04-14 13:59:30 -07002058 mut irq_chip: Box<dyn IrqChipArch + 'static>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002059 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002060 run_rt: bool,
Daniel Verkamp107edb32019-04-05 09:58:48 -07002061 vcpu_affinity: Vec<usize>,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002062 no_smt: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07002063 start_barrier: Arc<Barrier>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002064 has_bios: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07002065 io_bus: devices::Bus,
2066 mmio_bus: devices::Bus,
Michael Hoyle685316f2020-09-16 15:29:20 -07002067 exit_evt: Event,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002068 requires_pvclock_ctrl: bool,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002069 from_main_tube: mpsc::Receiver<VcpuControl>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002070 use_hypervisor_signals: bool,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002071 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_tube: Option<
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002072 mpsc::Sender<VcpuDebugStatusMessage>,
2073 >,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002074) -> Result<JoinHandle<()>>
2075where
Zach Reizner2c770e62020-09-30 16:49:59 -07002076 V: VcpuArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002077{
Zach Reizner8fb52112017-12-13 16:04:39 -08002078 thread::Builder::new()
2079 .name(format!("crosvm_vcpu{}", cpu_id))
2080 .spawn(move || {
Zach Reizner95885312020-01-29 18:06:01 -08002081 // The VCPU thread must trigger the `exit_evt` in all paths, and a `ScopedEvent`'s Drop
2082 // implementation accomplishes that.
2083 let _scoped_exit_evt = ScopedEvent::from(exit_evt);
2084
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002085 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2086 let guest_mem = vm.get_memory().clone();
Zach Reizner2c770e62020-09-30 16:49:59 -07002087 let runnable_vcpu = runnable_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002088 cpu_id,
2089 vcpu,
2090 vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07002091 irq_chip.as_mut(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002092 vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002093 run_rt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002094 vcpu_affinity,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002095 no_smt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002096 has_bios,
2097 use_hypervisor_signals,
2098 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08002099
Zach Reizner8fb52112017-12-13 16:04:39 -08002100 start_barrier.wait();
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002101
Zach Reizner2c770e62020-09-30 16:49:59 -07002102 let (vcpu, vcpu_run_handle) = match runnable_vcpu {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002103 Ok(v) => v,
2104 Err(e) => {
2105 error!("failed to start vcpu {}: {}", cpu_id, e);
2106 return;
2107 }
2108 };
Mark Ryan6ed5aea2018-04-20 13:52:35 +01002109
Dylan Reidb0492662019-05-17 14:50:13 -07002110 let mut run_mode = VmRunMode::Running;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002111 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002112 if to_gdb_tube.is_some() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002113 // Wait until a GDB client attaches
2114 run_mode = VmRunMode::Breakpoint;
2115 }
2116
Dylan Reidb0492662019-05-17 14:50:13 -07002117 let mut interrupted_by_signal = false;
2118
2119 'vcpu_loop: loop {
2120 // Start by checking for messages to process and the run state of the CPU.
2121 // An extra check here for Running so there isn't a need to call recv unless a
2122 // message is likely to be ready because a signal was sent.
2123 if interrupted_by_signal || run_mode != VmRunMode::Running {
2124 'state_loop: loop {
2125 // Tries to get a pending message without blocking first.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002126 let msg = match from_main_tube.try_recv() {
Dylan Reidb0492662019-05-17 14:50:13 -07002127 Ok(m) => m,
2128 Err(mpsc::TryRecvError::Empty) if run_mode == VmRunMode::Running => {
2129 // If the VM is running and no message is pending, the state won't
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002130 // change.
Dylan Reidb0492662019-05-17 14:50:13 -07002131 break 'state_loop;
2132 }
2133 Err(mpsc::TryRecvError::Empty) => {
2134 // If the VM is not running, wait until a message is ready.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002135 match from_main_tube.recv() {
Dylan Reidb0492662019-05-17 14:50:13 -07002136 Ok(m) => m,
2137 Err(mpsc::RecvError) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002138 error!("Failed to read from main tube in vcpu");
Dylan Reidb0492662019-05-17 14:50:13 -07002139 break 'vcpu_loop;
2140 }
2141 }
2142 }
2143 Err(mpsc::TryRecvError::Disconnected) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002144 error!("Failed to read from main tube in vcpu");
Dylan Reidb0492662019-05-17 14:50:13 -07002145 break 'vcpu_loop;
2146 }
2147 };
2148
2149 // Collect all pending messages.
2150 let mut messages = vec![msg];
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002151 messages.append(&mut from_main_tube.try_iter().collect());
Dylan Reidb0492662019-05-17 14:50:13 -07002152
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002153 for msg in messages {
2154 match msg {
2155 VcpuControl::RunState(new_mode) => {
2156 run_mode = new_mode;
2157 match run_mode {
2158 VmRunMode::Running => break 'state_loop,
2159 VmRunMode::Suspending => {
2160 // On KVM implementations that use a paravirtualized
2161 // clock (e.g. x86), a flag must be set to indicate to
2162 // the guest kernel that a vCPU was suspended. The guest
2163 // kernel will use this flag to prevent the soft lockup
2164 // detection from triggering when this vCPU resumes,
2165 // which could happen days later in realtime.
2166 if requires_pvclock_ctrl {
2167 if let Err(e) = vcpu.pvclock_ctrl() {
2168 error!(
2169 "failed to tell hypervisor vcpu {} is suspending: {}",
2170 cpu_id, e
2171 );
2172 }
2173 }
2174 }
2175 VmRunMode::Breakpoint => {}
2176 VmRunMode::Exiting => break 'vcpu_loop,
2177 }
2178 }
2179 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2180 VcpuControl::Debug(d) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002181 match &to_gdb_tube {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002182 Some(ref ch) => {
2183 if let Err(e) = handle_debug_msg(
2184 cpu_id, &vcpu, &guest_mem, d, &ch,
2185 ) {
2186 error!("Failed to handle gdb message: {}", e);
2187 }
2188 },
2189 None => {
2190 error!("VcpuControl::Debug received while GDB feature is disabled: {:?}", d);
Dylan Reidb0492662019-05-17 14:50:13 -07002191 }
2192 }
2193 }
Dylan Reidb0492662019-05-17 14:50:13 -07002194 }
2195 }
2196 }
2197 }
2198
2199 interrupted_by_signal = false;
2200
Steven Richman11dc6712020-09-02 15:39:14 -07002201 // Vcpus may have run a HLT instruction, which puts them into a state other than
2202 // VcpuRunState::Runnable. In that case, this call to wait_until_runnable blocks
2203 // until either the irqchip receives an interrupt for this vcpu, or until the main
2204 // thread kicks this vcpu as a result of some VmControl operation. In most IrqChip
2205 // implementations HLT instructions do not make it to crosvm, and thus this is a
2206 // no-op that always returns VcpuRunState::Runnable.
2207 match irq_chip.wait_until_runnable(&vcpu) {
2208 Ok(VcpuRunState::Runnable) => {}
2209 Ok(VcpuRunState::Interrupted) => interrupted_by_signal = true,
2210 Err(e) => error!(
2211 "error waiting for vcpu {} to become runnable: {}",
2212 cpu_id, e
2213 ),
2214 }
2215
2216 if !interrupted_by_signal {
2217 match vcpu.run(&vcpu_run_handle) {
2218 Ok(VcpuExit::IoIn { port, mut size }) => {
2219 let mut data = [0; 8];
2220 if size > data.len() {
2221 error!("unsupported IoIn size of {} bytes", size);
2222 size = data.len();
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002223 }
Steven Richman11dc6712020-09-02 15:39:14 -07002224 io_bus.read(port as u64, &mut data[..size]);
2225 if let Err(e) = vcpu.set_data(&data[..size]) {
2226 error!("failed to set return data for IoIn: {}", e);
2227 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09002228 }
Steven Richman11dc6712020-09-02 15:39:14 -07002229 Ok(VcpuExit::IoOut {
2230 port,
2231 mut size,
2232 data,
2233 }) => {
2234 if size > data.len() {
2235 error!("unsupported IoOut size of {} bytes", size);
2236 size = data.len();
2237 }
2238 io_bus.write(port as u64, &data[..size]);
2239 }
2240 Ok(VcpuExit::MmioRead { address, size }) => {
2241 let mut data = [0; 8];
2242 mmio_bus.read(address, &mut data[..size]);
2243 // Setting data for mmio can not fail.
2244 let _ = vcpu.set_data(&data[..size]);
2245 }
2246 Ok(VcpuExit::MmioWrite {
2247 address,
2248 size,
2249 data,
2250 }) => {
2251 mmio_bus.write(address, &data[..size]);
2252 }
2253 Ok(VcpuExit::IoapicEoi { vector }) => {
2254 if let Err(e) = irq_chip.broadcast_eoi(vector) {
2255 error!(
2256 "failed to broadcast eoi {} on vcpu {}: {}",
2257 vector, cpu_id, e
2258 );
2259 }
2260 }
2261 Ok(VcpuExit::IrqWindowOpen) => {}
2262 Ok(VcpuExit::Hlt) => irq_chip.halted(cpu_id),
2263 Ok(VcpuExit::Shutdown) => break,
2264 Ok(VcpuExit::FailEntry {
2265 hardware_entry_failure_reason,
2266 }) => {
2267 error!("vcpu hw run failure: {:#x}", hardware_entry_failure_reason);
Steven Richmanf32d0b42020-06-20 21:45:32 -07002268 break;
2269 }
Steven Richman11dc6712020-09-02 15:39:14 -07002270 Ok(VcpuExit::SystemEvent(_, _)) => break,
2271 Ok(VcpuExit::Debug { .. }) => {
2272 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2273 {
2274 let msg = VcpuDebugStatusMessage {
2275 cpu: cpu_id as usize,
2276 msg: VcpuDebugStatus::HitBreakPoint,
2277 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002278 if let Some(ref ch) = to_gdb_tube {
Steven Richman11dc6712020-09-02 15:39:14 -07002279 if let Err(e) = ch.send(msg) {
2280 error!("failed to notify breakpoint to GDB thread: {}", e);
2281 break;
2282 }
2283 }
2284 run_mode = VmRunMode::Breakpoint;
2285 }
2286 }
2287 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
2288 Err(e) => match e.errno() {
2289 libc::EINTR => interrupted_by_signal = true,
2290 libc::EAGAIN => {}
2291 _ => {
2292 error!("vcpu hit unknown error: {}", e);
2293 break;
2294 }
2295 },
2296 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002297 }
2298
2299 if interrupted_by_signal {
2300 if use_hypervisor_signals {
2301 // Try to clear the signal that we use to kick VCPU if it is pending before
2302 // attempting to handle pause requests.
2303 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
2304 error!("failed to clear pending signal: {}", e);
2305 break;
2306 }
2307 } else {
2308 vcpu.set_immediate_exit(false);
2309 }
David Tolnay8f3a2322018-11-30 17:11:35 -08002310 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002311
Steven Richman11dc6712020-09-02 15:39:14 -07002312 if let Err(e) = irq_chip.inject_interrupts(&vcpu) {
2313 error!("failed to inject interrupts for vcpu {}: {}", cpu_id, e);
2314 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002315 }
David Tolnay2bac1e72018-12-12 14:33:42 -08002316 })
2317 .map_err(Error::SpawnVcpu)
Zach Reizner39aa26b2017-12-12 18:03:23 -08002318}
2319
Charles William Dick0bf8a552019-10-29 15:36:01 +09002320// Reads the contents of a file and converts the space-separated fields into a Vec of i64s.
Sonny Raod5f66082019-04-24 12:24:38 -07002321// Returns an error if any of the fields fail to parse.
Charles William Dick0bf8a552019-10-29 15:36:01 +09002322fn file_fields_to_i64<P: AsRef<Path>>(path: P) -> io::Result<Vec<i64>> {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002323 let mut file = File::open(path)?;
2324
2325 let mut buf = [0u8; 32];
2326 let count = file.read(&mut buf)?;
2327
Zach Reizner55a9e502018-10-03 10:22:32 -07002328 let content =
2329 str::from_utf8(&buf[..count]).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
2330 content
2331 .trim()
Sonny Raod5f66082019-04-24 12:24:38 -07002332 .split_whitespace()
2333 .map(|x| {
Charles William Dick0bf8a552019-10-29 15:36:01 +09002334 x.parse::<i64>()
Sonny Raod5f66082019-04-24 12:24:38 -07002335 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
2336 })
2337 .collect()
2338}
2339
2340// Reads the contents of a file and converts them into a u64, and if there
2341// are multiple fields it only returns the first one.
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002342fn file_to_i64<P: AsRef<Path>>(path: P, nth: usize) -> io::Result<i64> {
Charles William Dick0bf8a552019-10-29 15:36:01 +09002343 file_fields_to_i64(path)?
Sonny Raod5f66082019-04-24 12:24:38 -07002344 .into_iter()
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002345 .nth(nth)
Sonny Raod5f66082019-04-24 12:24:38 -07002346 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "empty file"))
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002347}
2348
Zach Reiznera90649a2021-03-31 12:56:08 -07002349fn setup_vm_components(cfg: &Config) -> Result<VmComponents> {
David Tolnay2b089fc2019-03-04 15:33:22 -08002350 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
2351 Some(File::open(initrd_path).map_err(|e| Error::OpenInitrd(initrd_path.clone(), e))?)
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002352 } else {
2353 None
2354 };
2355
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002356 let vm_image = match cfg.executable_path {
2357 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
2358 File::open(kernel_path).map_err(|e| Error::OpenKernel(kernel_path.to_path_buf(), e))?,
2359 ),
2360 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
2361 File::open(bios_path).map_err(|e| Error::OpenBios(bios_path.to_path_buf(), e))?,
2362 ),
2363 _ => panic!("Did not receive a bios or kernel, should be impossible."),
2364 };
2365
Zach Reiznera90649a2021-03-31 12:56:08 -07002366 Ok(VmComponents {
Daniel Verkamp6a847062019-11-26 13:16:35 -08002367 memory_size: cfg
2368 .memory
2369 .unwrap_or(256)
2370 .checked_mul(1024 * 1024)
2371 .ok_or(Error::MemoryTooLarge)?,
Dylan Reid059a1882018-07-23 17:58:09 -07002372 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -07002373 vcpu_affinity: cfg.vcpu_affinity.clone(),
Daniel Verkamp8a72afc2021-03-15 17:55:52 -07002374 cpu_clusters: cfg.cpu_clusters.clone(),
2375 cpu_capacity: cfg.cpu_capacity.clone(),
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002376 no_smt: cfg.no_smt,
Sergey Senozhatsky1e369c52021-04-13 20:23:51 +09002377 hugepages: cfg.hugepages,
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002378 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002379 android_fstab: cfg
2380 .android_fstab
2381 .as_ref()
David Tolnay2b089fc2019-03-04 15:33:22 -08002382 .map(|x| File::open(x).map_err(|e| Error::OpenAndroidFstab(x.to_path_buf(), e)))
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002383 .map_or(Ok(None), |v| v.map(Some))?,
Kansho Nishida282115b2019-12-18 13:13:14 +09002384 pstore: cfg.pstore.clone(),
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002385 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -07002386 extra_kernel_params: cfg.params.clone(),
2387 wayland_dmabuf: cfg.wayland_dmabuf,
Tomasz Jeznach42644642020-05-20 23:27:59 -07002388 acpi_sdts: cfg
2389 .acpi_tables
2390 .iter()
2391 .map(|path| SDT::from_file(path).map_err(|e| Error::OpenAcpiTable(path.clone(), e)))
2392 .collect::<Result<Vec<SDT>>>()?,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002393 rt_cpus: cfg.rt_cpus.clone(),
Will Deacon7d2b8ac2020-10-06 18:51:12 +01002394 protected_vm: cfg.protected_vm,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002395 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
Zach Reiznera90649a2021-03-31 12:56:08 -07002396 gdb: None,
Tomasz Jeznachccb26942021-03-30 22:44:11 -07002397 dmi_path: cfg.dmi_path.clone(),
Tomasz Jeznachd93c29f2021-04-12 11:00:24 -07002398 no_legacy: cfg.no_legacy,
Zach Reiznera90649a2021-03-31 12:56:08 -07002399 })
2400}
2401
Zach Reiznerdc748482021-04-14 13:59:30 -07002402pub fn run_config(cfg: Config) -> Result<()> {
2403 let components = setup_vm_components(&cfg)?;
2404
2405 let guest_mem_layout =
2406 Arch::guest_memory_layout(&components).map_err(Error::GuestMemoryLayout)?;
2407 let guest_mem = GuestMemory::new(&guest_mem_layout).map_err(Error::CreateGuestMemory)?;
2408 let mut mem_policy = MemoryPolicy::empty();
2409 if components.hugepages {
2410 mem_policy |= MemoryPolicy::USE_HUGEPAGES;
2411 }
2412 guest_mem.set_memory_policy(mem_policy);
2413 let kvm = Kvm::new_with_path(&cfg.kvm_device_path).map_err(Error::CreateKvm)?;
2414 let vm = KvmVm::new(&kvm, guest_mem).map_err(Error::CreateVm)?;
2415 let vm_clone = vm.try_clone().map_err(Error::CreateVm)?;
2416
2417 enum KvmIrqChip {
2418 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2419 Split(KvmSplitIrqChip),
2420 Kernel(KvmKernelIrqChip),
2421 }
2422
2423 impl KvmIrqChip {
2424 fn as_mut(&mut self) -> &mut dyn IrqChipArch {
2425 match self {
2426 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2427 KvmIrqChip::Split(i) => i,
2428 KvmIrqChip::Kernel(i) => i,
2429 }
2430 }
2431 }
2432
2433 let ioapic_host_tube;
2434 let mut irq_chip = if cfg.split_irqchip {
2435 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
2436 unimplemented!("KVM split irqchip mode only supported on x86 processors");
2437 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2438 {
2439 let (host_tube, ioapic_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2440 ioapic_host_tube = Some(host_tube);
2441 KvmIrqChip::Split(
2442 KvmSplitIrqChip::new(
2443 vm_clone,
2444 components.vcpu_count,
2445 ioapic_device_tube,
2446 Some(120),
2447 )
2448 .map_err(Error::CreateIrqChip)?,
2449 )
2450 }
2451 } else {
2452 ioapic_host_tube = None;
2453 KvmIrqChip::Kernel(
2454 KvmKernelIrqChip::new(vm_clone, components.vcpu_count).map_err(Error::CreateIrqChip)?,
2455 )
2456 };
2457
2458 run_vm::<KvmVcpu, KvmVm>(cfg, components, vm, irq_chip.as_mut(), ioapic_host_tube)
2459}
2460
2461fn run_vm<Vcpu, V>(
Zach Reiznera90649a2021-03-31 12:56:08 -07002462 cfg: Config,
2463 #[allow(unused_mut)] mut components: VmComponents,
Zach Reiznerdc748482021-04-14 13:59:30 -07002464 mut vm: V,
2465 irq_chip: &mut dyn IrqChipArch,
2466 ioapic_host_tube: Option<Tube>,
Zach Reiznera90649a2021-03-31 12:56:08 -07002467) -> Result<()>
2468where
2469 Vcpu: VcpuArch + 'static,
2470 V: VmArch + 'static,
Zach Reiznera90649a2021-03-31 12:56:08 -07002471{
2472 if cfg.sandbox {
2473 // Printing something to the syslog before entering minijail so that libc's syslogger has a
2474 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
2475 // access to those files will not be possible.
2476 info!("crosvm entering multiprocess mode");
2477 }
2478
Daniel Verkampf1439d42021-05-21 13:55:10 -07002479 #[cfg(feature = "usb")]
Zach Reiznera90649a2021-03-31 12:56:08 -07002480 let (usb_control_tube, usb_provider) =
2481 HostBackendDeviceProvider::new().map_err(Error::CreateUsbProvider)?;
Daniel Verkampf1439d42021-05-21 13:55:10 -07002482
Zach Reiznera90649a2021-03-31 12:56:08 -07002483 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
2484 // before any jailed devices have been spawned, so that we can catch any of them that fail very
2485 // quickly.
2486 let sigchld_fd = SignalFd::new(libc::SIGCHLD).map_err(Error::CreateSignalFd)?;
Dylan Reid059a1882018-07-23 17:58:09 -07002487
Zach Reiznera60744b2019-02-13 17:33:32 -08002488 let control_server_socket = match &cfg.socket_path {
2489 Some(path) => Some(UnlinkUnixSeqpacketListener(
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002490 UnixSeqpacketListener::bind(path).map_err(Error::CreateControlServer)?,
Zach Reiznera60744b2019-02-13 17:33:32 -08002491 )),
2492 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07002493 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002494
Zach Reiznera90649a2021-03-31 12:56:08 -07002495 let mut control_tubes = Vec::new();
2496
2497 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2498 if let Some(port) = cfg.gdb {
2499 // GDB needs a control socket to interrupt vcpus.
2500 let (gdb_host_tube, gdb_control_tube) = Tube::pair().map_err(Error::CreateTube)?;
2501 control_tubes.push(TaggedControlTube::Vm(gdb_host_tube));
2502 components.gdb = Some((port, gdb_control_tube));
2503 }
2504
Chirantan Ekbote2ee9dcd2021-05-26 18:21:44 +09002505 for wl_cfg in &cfg.vhost_user_wl {
2506 let wayland_host_tube = UnixSeqpacket::connect(&wl_cfg.vm_tube)
2507 .map(Tube::new)
2508 .map_err(Error::ConnectTube)?;
2509 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
2510 }
2511
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002512 let (wayland_host_tube, wayland_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2513 control_tubes.push(TaggedControlTube::VmMemory(wayland_host_tube));
Dylan Reid059a1882018-07-23 17:58:09 -07002514 // Balloon gets a special socket so balloon requests can be forwarded from the main process.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002515 let (balloon_host_tube, balloon_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
Dylan Reid059a1882018-07-23 17:58:09 -07002516
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002517 // Create one control socket per disk.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002518 let mut disk_device_tubes = Vec::new();
2519 let mut disk_host_tubes = Vec::new();
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002520 let disk_count = cfg.disks.len();
2521 for _ in 0..disk_count {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002522 let (disk_host_tub, disk_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2523 disk_host_tubes.push(disk_host_tub);
2524 disk_device_tubes.push(disk_device_tube);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002525 }
2526
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002527 let mut pmem_device_tubes = Vec::new();
Daniel Verkampe1980a92020-02-07 11:00:55 -08002528 let pmem_count = cfg.pmem_devices.len();
2529 for _ in 0..pmem_count {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002530 let (pmem_host_tube, pmem_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2531 pmem_device_tubes.push(pmem_device_tube);
2532 control_tubes.push(TaggedControlTube::VmMsync(pmem_host_tube));
Daniel Verkampe1980a92020-02-07 11:00:55 -08002533 }
2534
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002535 let (gpu_host_tube, gpu_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2536 control_tubes.push(TaggedControlTube::VmMemory(gpu_host_tube));
Gurchetan Singh96beafc2019-05-15 09:46:52 -07002537
Zach Reiznerdc748482021-04-14 13:59:30 -07002538 if let Some(ioapic_host_tube) = ioapic_host_tube {
2539 control_tubes.push(TaggedControlTube::VmIrq(ioapic_host_tube));
2540 }
Zhuocheng Dingf2e90bf2019-12-02 15:50:20 +08002541
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002542 let battery = if cfg.battery_type.is_some() {
Alex Lauf408c732020-11-10 18:24:04 +09002543 let jail = match simple_jail(&cfg, "battery")? {
2544 #[cfg_attr(not(feature = "powerd-monitor-powerd"), allow(unused_mut))]
2545 Some(mut jail) => {
2546 // Setup a bind mount to the system D-Bus socket if the powerd monitor is used.
2547 #[cfg(feature = "power-monitor-powerd")]
2548 {
2549 add_crosvm_user_to_jail(&mut jail, "battery")?;
2550
2551 // Create a tmpfs in the device's root directory so that we can bind mount files.
2552 jail.mount_with_data(
2553 Path::new("none"),
2554 Path::new("/"),
2555 "tmpfs",
2556 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
2557 "size=67108864",
2558 )?;
2559
2560 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
2561 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
2562 }
2563 Some(jail)
2564 }
2565 None => None,
2566 };
2567 (&cfg.battery_type, jail)
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002568 } else {
2569 (&cfg.battery_type, None)
2570 };
2571
Gurchetan Singh293913c2020-12-09 10:44:13 -08002572 let gralloc = RutabagaGralloc::new().map_err(Error::CreateGrallocError)?;
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002573 let map_request: Arc<Mutex<Option<ExternalMapping>>> = Arc::new(Mutex::new(None));
2574
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002575 let fs_count = cfg
2576 .shared_dirs
2577 .iter()
2578 .filter(|sd| sd.kind == SharedDirKind::FS)
2579 .count();
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002580 let mut fs_device_tubes = Vec::with_capacity(fs_count);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002581 for _ in 0..fs_count {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002582 let (fs_host_tube, fs_device_tube) = Tube::pair().map_err(Error::CreateTube)?;
2583 control_tubes.push(TaggedControlTube::Fs(fs_host_tube));
2584 fs_device_tubes.push(fs_device_tube);
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09002585 }
2586
Zach Reiznerdc748482021-04-14 13:59:30 -07002587 let exit_evt = Event::new().map_err(Error::CreateEvent)?;
2588 let mut sys_allocator = Arch::create_system_allocator(vm.get_memory());
2589 let pci_devices = create_devices(
2590 &cfg,
2591 &mut vm,
2592 &mut sys_allocator,
2593 &exit_evt,
2594 &mut control_tubes,
2595 wayland_device_tube,
2596 gpu_device_tube,
2597 balloon_device_tube,
2598 &mut disk_device_tubes,
2599 &mut pmem_device_tubes,
2600 &mut fs_device_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07002601 #[cfg(feature = "usb")]
Zach Reiznerdc748482021-04-14 13:59:30 -07002602 usb_provider,
2603 Arc::clone(&map_request),
2604 )?;
2605
Kuo-Hsin Yang6139da62021-04-14 16:55:24 +08002606 #[cfg_attr(not(feature = "direct"), allow(unused_mut))]
Zach Reiznerdc748482021-04-14 13:59:30 -07002607 let mut linux = Arch::build_vm::<V, Vcpu>(
Trent Begin17ccaad2019-04-17 13:51:25 -06002608 components,
Zach Reiznerdc748482021-04-14 13:59:30 -07002609 &exit_evt,
2610 &mut sys_allocator,
Trent Begin17ccaad2019-04-17 13:51:25 -06002611 &cfg.serial_parameters,
Matt Delco45caf912019-11-13 08:11:09 -08002612 simple_jail(&cfg, "serial")?,
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002613 battery,
Zach Reiznera90649a2021-03-31 12:56:08 -07002614 vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07002615 pci_devices,
2616 irq_chip,
Trent Begin17ccaad2019-04-17 13:51:25 -06002617 )
David Tolnaybe034262019-03-04 17:48:36 -08002618 .map_err(Error::BuildVm)?;
Lepton Wu60893882018-11-21 11:06:18 -08002619
Tomasz Jeznach3ce74762021-02-26 01:01:53 -08002620 #[cfg(feature = "direct")]
2621 if let Some(pmio) = &cfg.direct_pmio {
2622 let direct_io =
2623 Arc::new(devices::DirectIo::new(&pmio.path, false).map_err(Error::DirectIo)?);
2624 for range in pmio.ranges.iter() {
2625 linux
2626 .io_bus
2627 .insert_sync(direct_io.clone(), range.0, range.1)
2628 .unwrap();
2629 }
2630 };
2631
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002632 #[cfg(feature = "direct")]
2633 let mut irqs = Vec::new();
2634
2635 #[cfg(feature = "direct")]
2636 for irq in &cfg.direct_level_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07002637 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002638 warn!("irq {} already reserved.", irq);
2639 }
2640 let trigger = Event::new().map_err(Error::CreateEvent)?;
2641 let resample = Event::new().map_err(Error::CreateEvent)?;
2642 linux
2643 .irq_chip
2644 .register_irq_event(*irq, &trigger, Some(&resample))
2645 .unwrap();
2646 let direct_irq =
2647 devices::DirectIrq::new(trigger, Some(resample)).map_err(Error::DirectIrq)?;
2648 direct_irq.irq_enable(*irq).map_err(Error::DirectIrq)?;
2649 irqs.push(direct_irq);
2650 }
2651
2652 #[cfg(feature = "direct")]
2653 for irq in &cfg.direct_edge_irq {
Zach Reiznerdc748482021-04-14 13:59:30 -07002654 if !sys_allocator.reserve_irq(*irq) {
Tomasz Jeznach7271f752021-03-04 01:44:06 -08002655 warn!("irq {} already reserved.", irq);
2656 }
2657 let trigger = Event::new().map_err(Error::CreateEvent)?;
2658 linux
2659 .irq_chip
2660 .register_irq_event(*irq, &trigger, None)
2661 .unwrap();
2662 let direct_irq = devices::DirectIrq::new(trigger, None).map_err(Error::DirectIrq)?;
2663 direct_irq.irq_enable(*irq).map_err(Error::DirectIrq)?;
2664 irqs.push(direct_irq);
2665 }
2666
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002667 run_control(
2668 linux,
Zach Reiznerdc748482021-04-14 13:59:30 -07002669 sys_allocator,
Zach Reiznera60744b2019-02-13 17:33:32 -08002670 control_server_socket,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002671 control_tubes,
2672 balloon_host_tube,
2673 &disk_host_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07002674 #[cfg(feature = "usb")]
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002675 usb_control_tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07002676 exit_evt,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002677 sigchld_fd,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002678 cfg.sandbox,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002679 Arc::clone(&map_request),
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002680 cfg.balloon_bias,
Gurchetan Singh293913c2020-12-09 10:44:13 -08002681 gralloc,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002682 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07002683}
2684
Daniel Verkamp29409802021-02-24 14:46:19 -08002685/// Signals all running VCPUs to vmexit, sends VcpuControl message to each VCPU tube, and tells
2686/// `irq_chip` to stop blocking halted VCPUs. The channel message is set first because both the
Steven Richman11dc6712020-09-02 15:39:14 -07002687/// signal and the irq_chip kick could cause the VCPU thread to continue through the VCPU run
2688/// loop.
2689fn kick_all_vcpus(
2690 vcpu_handles: &[(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)],
Zach Reiznerdc748482021-04-14 13:59:30 -07002691 irq_chip: &dyn IrqChip,
Daniel Verkamp29409802021-02-24 14:46:19 -08002692 message: VcpuControl,
Steven Richman11dc6712020-09-02 15:39:14 -07002693) {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002694 for (handle, tube) in vcpu_handles {
Daniel Verkamp29409802021-02-24 14:46:19 -08002695 if let Err(e) = tube.send(message.clone()) {
2696 error!("failed to send VcpuControl: {}", e);
Steven Richman11dc6712020-09-02 15:39:14 -07002697 }
2698 let _ = handle.kill(SIGRTMIN() + 0);
2699 }
2700 irq_chip.kick_halted_vcpus();
2701}
2702
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002703// BalloonPolicy determines the size to set the balloon.
2704struct BalloonPolicy {
2705 // Estimate for when the guest starts aggressivly freeing memory.
2706 critical_guest_available: i64,
2707 critical_host_available: i64, // ChromeOS critical margin.
2708 guest_available_bias: i64,
2709 max_balloon_actual: i64, // The largest the balloon has ever been observed.
2710 prev_balloon_full_percent: i64, // How full was the balloon at the previous timestep.
2711 prev_guest_available: i64, // Available memory in the guest at the previous timestep.
2712}
2713
2714const ONE_KB: i64 = 1024;
2715const ONE_MB: i64 = 1024 * ONE_KB;
2716
2717const LOWMEM_AVAILABLE: &str = "/sys/kernel/mm/chromeos-low_mem/available";
2718const LOWMEM_MARGIN: &str = "/sys/kernel/mm/chromeos-low_mem/margin";
2719
2720// BalloonPolicy implements the virtio balloon sizing logic.
2721// The balloon is sized with the following heuristics:
2722// Balance Available
2723// The balloon is sized to balance the amount of available memory above a
2724// critical margin. The critical margin is the level at which memory is
2725// freed. In the host, this is the ChromeOS available critical margin, which
2726// is the trigger to kill tabs. In the guest, we estimate this level by
2727// tracking the minimum amount of available memory, discounting sharp
2728// 'valleys'. If the guest manages to keep available memory above a given
2729// level even with some pressure, then we determine that this is the
2730// 'critical' level for the guest. We don't update this critical value if
2731// the balloon is fully inflated because in that case, the guest may be out
2732// of memory to free.
2733// guest_available_bias
2734// Even if available memory is perfectly balanced between host and guest,
2735// The size of the balloon will still drift randomly depending on whether
2736// those host or guest reclaims memory first/faster every time memory is
2737// low. To encourage large balloons to shrink and small balloons to grow,
2738// the following bias is added to the guest critical margin:
2739// (guest_available_bias * balloon_full_percent) / 100
2740// This give the guest more memory when the balloon is full.
2741impl BalloonPolicy {
2742 fn new(
2743 memory_size: i64,
2744 critical_host_available: i64,
2745 guest_available_bias: i64,
2746 ) -> BalloonPolicy {
2747 // Estimate some reasonable initial maximum for balloon size.
2748 let max_balloon_actual = (memory_size * 3) / 4;
2749 // 400MB is above the zone min margin even for Crostini VMs on 16GB
2750 // devices (~85MB), and is above when Android Low Memory Killer kills
2751 // apps (~250MB).
2752 let critical_guest_available = 400 * ONE_MB;
2753
2754 BalloonPolicy {
2755 critical_guest_available,
2756 critical_host_available,
2757 guest_available_bias,
2758 max_balloon_actual,
2759 prev_balloon_full_percent: 0,
2760 prev_guest_available: 0,
2761 }
2762 }
2763 fn delta(&mut self, stats: BalloonStats, balloon_actual_u: u64) -> Result<i64> {
2764 let guest_free = stats
2765 .free_memory
2766 .map(i64::try_from)
2767 .ok_or(Error::GuestFreeMissing())?
2768 .map_err(Error::GuestFreeTooLarge)?;
2769 let guest_cached = stats
2770 .disk_caches
2771 .map(i64::try_from)
2772 .ok_or(Error::GuestFreeMissing())?
2773 .map_err(Error::GuestFreeTooLarge)?;
2774 let balloon_actual = match balloon_actual_u {
2775 size if size < i64::max_value() as u64 => size as i64,
2776 _ => return Err(Error::BalloonActualTooLarge),
2777 };
2778 let guest_available = guest_free + guest_cached;
2779 // Available memory is reported in MB, and we need bytes.
2780 let host_available =
2781 file_to_i64(LOWMEM_AVAILABLE, 0).map_err(Error::ReadMemAvailable)? * ONE_MB;
2782 if self.max_balloon_actual < balloon_actual {
2783 self.max_balloon_actual = balloon_actual;
2784 info!(
2785 "balloon updated max_balloon_actual to {} MiB",
2786 self.max_balloon_actual / ONE_MB,
2787 );
2788 }
2789 let balloon_full_percent = balloon_actual * 100 / self.max_balloon_actual;
2790 // Update critical_guest_available if we see a lower available with the
2791 // balloon not fully inflated. If the balloon is completely inflated
2792 // there is a risk that the low available level we see comes at the cost
2793 // of stability. The Linux OOM Killer might have been forced to kill
2794 // something important, or page reclaim was so aggressive that there are
2795 // long UI hangs.
2796 if guest_available < self.critical_guest_available && balloon_full_percent < 95 {
2797 // To ignore temporary low memory states, we require that two guest
2798 // available measurements in a row are low.
2799 if self.prev_guest_available < self.critical_guest_available
2800 && self.prev_balloon_full_percent < 95
2801 {
2802 self.critical_guest_available = self.prev_guest_available;
2803 info!(
2804 "balloon updated critical_guest_available to {} MiB",
2805 self.critical_guest_available / ONE_MB,
2806 );
2807 }
2808 }
2809
2810 // Compute the difference in available memory above the host and guest
2811 // critical thresholds.
2812 let bias = (self.guest_available_bias * balloon_full_percent) / 100;
2813 let guest_above_critical = guest_available - self.critical_guest_available - bias;
2814 let host_above_critical = host_available - self.critical_host_available;
2815 let balloon_delta = guest_above_critical - host_above_critical;
2816 // Only let the balloon take up MAX_CRITICAL_DELTA of available memory
2817 // below the critical level in host or guest.
2818 const MAX_CRITICAL_DELTA: i64 = 10 * ONE_MB;
2819 let balloon_delta_capped = if balloon_delta < 0 {
2820 // The balloon is deflating, taking memory from the host. Don't let
2821 // it take more than the amount of available memory above the
2822 // critical margin, plus MAX_CRITICAL_DELTA.
2823 max(
2824 balloon_delta,
2825 -(host_available - self.critical_host_available + MAX_CRITICAL_DELTA),
2826 )
2827 } else {
2828 // The balloon is inflating, taking memory from the guest. Don't let
2829 // it take more than the amount of available memory above the
2830 // critical margin, plus MAX_CRITICAL_DELTA.
2831 min(
2832 balloon_delta,
2833 guest_available - self.critical_guest_available + MAX_CRITICAL_DELTA,
2834 )
2835 };
2836
2837 self.prev_balloon_full_percent = balloon_full_percent;
2838 self.prev_guest_available = guest_available;
2839
2840 // Only return a value if target would change available above critical
2841 // by more than 1%, or we are within 1 MB of critical in host or guest.
2842 if guest_above_critical < ONE_MB
2843 || host_above_critical < ONE_MB
2844 || (balloon_delta.abs() * 100) / guest_above_critical > 1
2845 || (balloon_delta.abs() * 100) / host_above_critical > 1
2846 {
2847 // Finally, make sure the balloon delta won't cause a negative size.
2848 let result = max(balloon_delta_capped, -balloon_actual);
2849 if result != 0 {
2850 info!(
2851 "balloon delta={:<6} ha={:<6} hc={:<6} ga={:<6} gc={:<6} bias={:<6} full={:>3}%",
2852 result / ONE_MB,
2853 host_available / ONE_MB,
2854 self.critical_host_available / ONE_MB,
2855 guest_available / ONE_MB,
2856 self.critical_guest_available / ONE_MB,
2857 bias / ONE_MB,
2858 balloon_full_percent,
2859 );
2860 }
2861 return Ok(result);
2862 }
Andrew Walbran9cfdbd92021-01-11 17:40:34 +00002863 Ok(0)
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002864 }
2865}
2866
Zach Reiznerdc748482021-04-14 13:59:30 -07002867fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
2868 mut linux: RunnableLinuxVm<V, Vcpu>,
2869 mut sys_allocator: SystemAllocator,
Zach Reiznera60744b2019-02-13 17:33:32 -08002870 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002871 mut control_tubes: Vec<TaggedControlTube>,
2872 balloon_host_tube: Tube,
2873 disk_host_tubes: &[Tube],
Daniel Verkampf1439d42021-05-21 13:55:10 -07002874 #[cfg(feature = "usb")] usb_control_tube: Tube,
Zach Reiznerdc748482021-04-14 13:59:30 -07002875 exit_evt: Event,
Zach Reizner55a9e502018-10-03 10:22:32 -07002876 sigchld_fd: SignalFd,
Lepton Wu20333e42019-03-14 10:48:03 -07002877 sandbox: bool,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002878 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002879 balloon_bias: i64,
Gurchetan Singh293913c2020-12-09 10:44:13 -08002880 mut gralloc: RutabagaGralloc,
Zach Reizner55a9e502018-10-03 10:22:32 -07002881) -> Result<()> {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002882 #[derive(PollToken)]
2883 enum Token {
2884 Exit,
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002885 Suspend,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002886 ChildSignal,
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002887 IrqFd { index: IrqEventIndex },
Charles William Dick0bf8a552019-10-29 15:36:01 +09002888 BalanceMemory,
2889 BalloonResult,
Zach Reiznera60744b2019-02-13 17:33:32 -08002890 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002891 VmControl { index: usize },
2892 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002893
Zach Reizner19ad1f32019-12-12 18:58:50 -08002894 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08002895 .set_raw_mode()
2896 .expect("failed to set terminal raw mode");
2897
Michael Hoylee392c462020-10-07 03:29:24 -07002898 let wait_ctx = WaitContext::build_with(&[
Zach Reiznerdc748482021-04-14 13:59:30 -07002899 (&exit_evt, Token::Exit),
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002900 (&linux.suspend_evt, Token::Suspend),
Zach Reiznerb2110be2019-07-23 15:55:03 -07002901 (&sigchld_fd, Token::ChildSignal),
2902 ])
Michael Hoylee392c462020-10-07 03:29:24 -07002903 .map_err(Error::WaitContextAdd)?;
Zach Reiznerb2110be2019-07-23 15:55:03 -07002904
Zach Reiznera60744b2019-02-13 17:33:32 -08002905 if let Some(socket_server) = &control_server_socket {
Michael Hoylee392c462020-10-07 03:29:24 -07002906 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08002907 .add(socket_server, Token::VmControlServer)
Michael Hoylee392c462020-10-07 03:29:24 -07002908 .map_err(Error::WaitContextAdd)?;
Zach Reiznera60744b2019-02-13 17:33:32 -08002909 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002910 for (index, socket) in control_tubes.iter().enumerate() {
Michael Hoylee392c462020-10-07 03:29:24 -07002911 wait_ctx
Zach Reizner55a9e502018-10-03 10:22:32 -07002912 .add(socket.as_ref(), Token::VmControl { index })
Michael Hoylee392c462020-10-07 03:29:24 -07002913 .map_err(Error::WaitContextAdd)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002914 }
2915
Steven Richmanf32d0b42020-06-20 21:45:32 -07002916 let events = linux
2917 .irq_chip
2918 .irq_event_tokens()
Michael Hoylee392c462020-10-07 03:29:24 -07002919 .map_err(Error::WaitContextAdd)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002920
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002921 for (index, _gsi, evt) in events {
Michael Hoylee392c462020-10-07 03:29:24 -07002922 wait_ctx
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002923 .add(&evt, Token::IrqFd { index })
Michael Hoylee392c462020-10-07 03:29:24 -07002924 .map_err(Error::WaitContextAdd)?;
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002925 }
2926
Charles William Dick0bf8a552019-10-29 15:36:01 +09002927 // Balance available memory between guest and host every second.
Michael Hoyle08d86a42020-08-19 14:45:21 -07002928 let mut balancemem_timer = Timer::new().map_err(Error::CreateTimer)?;
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002929 let mut balloon_policy = if let Ok(critical_margin) = file_to_i64(LOWMEM_MARGIN, 0) {
Charles William Dick0bf8a552019-10-29 15:36:01 +09002930 // Create timer request balloon stats every 1s.
Michael Hoylee392c462020-10-07 03:29:24 -07002931 wait_ctx
Charles William Dick0bf8a552019-10-29 15:36:01 +09002932 .add(&balancemem_timer, Token::BalanceMemory)
Michael Hoylee392c462020-10-07 03:29:24 -07002933 .map_err(Error::WaitContextAdd)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002934 let balancemem_dur = Duration::from_secs(1);
2935 let balancemem_int = Duration::from_secs(1);
2936 balancemem_timer
2937 .reset(balancemem_dur, Some(balancemem_int))
Michael Hoyle08d86a42020-08-19 14:45:21 -07002938 .map_err(Error::ResetTimer)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002939
2940 // Listen for balloon statistics from the guest so we can balance.
Michael Hoylee392c462020-10-07 03:29:24 -07002941 wait_ctx
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08002942 .add(&balloon_host_tube, Token::BalloonResult)
Michael Hoylee392c462020-10-07 03:29:24 -07002943 .map_err(Error::WaitContextAdd)?;
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002944 Some(BalloonPolicy::new(
2945 linux.vm.get_memory().memory_size() as i64,
2946 critical_margin * ONE_MB,
2947 balloon_bias,
2948 ))
Charles William Dick0bf8a552019-10-29 15:36:01 +09002949 } else {
Charles William Dick0e3d4b62020-12-14 12:16:46 +09002950 warn!("Unable to open low mem margin, maybe not a chrome os kernel");
2951 None
2952 };
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002953
Lepton Wu20333e42019-03-14 10:48:03 -07002954 if sandbox {
2955 // Before starting VCPUs, in case we started with some capabilities, drop them all.
2956 drop_capabilities().map_err(Error::DropCapabilities)?;
2957 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08002958
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002959 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2960 // Create a channel for GDB thread.
2961 let (to_gdb_channel, from_vcpu_channel) = if linux.gdb.is_some() {
2962 let (s, r) = mpsc::channel();
2963 (Some(s), Some(r))
2964 } else {
2965 (None, None)
2966 };
2967
Steven Richmanf32d0b42020-06-20 21:45:32 -07002968 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
2969 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
Steven Richmanf32d0b42020-06-20 21:45:32 -07002970 let use_hypervisor_signals = !linux
2971 .vm
2972 .get_hypervisor()
2973 .check_capability(&HypervisorCap::ImmediateExit);
Zach Reizner304e7312020-09-29 16:00:24 -07002974 setup_vcpu_signal_handler::<Vcpu>(use_hypervisor_signals)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002975
Zach Reizner304e7312020-09-29 16:00:24 -07002976 let vcpus: Vec<Option<_>> = match linux.vcpus.take() {
Andrew Walbran9cfdbd92021-01-11 17:40:34 +00002977 Some(vec) => vec.into_iter().map(Some).collect(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002978 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
2979 };
Daniel Verkamp94c35272019-09-12 13:31:30 -07002980 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Dylan Reidb0492662019-05-17 14:50:13 -07002981 let (to_vcpu_channel, from_main_channel) = mpsc::channel();
Daniel Verkampc677fb42020-09-08 13:47:49 -07002982 let vcpu_affinity = match linux.vcpu_affinity.clone() {
2983 Some(VcpuAffinity::Global(v)) => v,
2984 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(),
2985 None => Default::default(),
2986 };
Zach Reizner55a9e502018-10-03 10:22:32 -07002987 let handle = run_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002988 cpu_id,
Zach Reizner55a9e502018-10-03 10:22:32 -07002989 vcpu,
Michael Hoyle685316f2020-09-16 15:29:20 -07002990 linux.vm.try_clone().map_err(Error::CloneEvent)?,
Zach Reiznerdc748482021-04-14 13:59:30 -07002991 linux.irq_chip.try_box_clone().map_err(Error::CloneEvent)?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002992 linux.vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002993 linux.rt_cpus.contains(&cpu_id),
Daniel Verkampc677fb42020-09-08 13:47:49 -07002994 vcpu_affinity,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002995 linux.no_smt,
Zach Reizner55a9e502018-10-03 10:22:32 -07002996 vcpu_thread_barrier.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002997 linux.has_bios,
Zach Reizner55a9e502018-10-03 10:22:32 -07002998 linux.io_bus.clone(),
2999 linux.mmio_bus.clone(),
Zach Reiznerdc748482021-04-14 13:59:30 -07003000 exit_evt.try_clone().map_err(Error::CloneEvent)?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003001 linux.vm.check_capability(VmCap::PvClockSuspend),
Dylan Reidb0492662019-05-17 14:50:13 -07003002 from_main_channel,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003003 use_hypervisor_signals,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003004 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
3005 to_gdb_channel.clone(),
Zach Reizner55a9e502018-10-03 10:22:32 -07003006 )?;
Dylan Reidb0492662019-05-17 14:50:13 -07003007 vcpu_handles.push((handle, to_vcpu_channel));
Dylan Reid059a1882018-07-23 17:58:09 -07003008 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07003009
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003010 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
3011 // Spawn GDB thread.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003012 if let Some((gdb_port_num, gdb_control_tube)) = linux.gdb.take() {
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003013 let to_vcpu_channels = vcpu_handles
3014 .iter()
3015 .map(|(_handle, channel)| channel.clone())
3016 .collect();
3017 let target = GdbStub::new(
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003018 gdb_control_tube,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003019 to_vcpu_channels,
3020 from_vcpu_channel.unwrap(), // Must succeed to unwrap()
3021 );
3022 thread::Builder::new()
3023 .name("gdb".to_owned())
3024 .spawn(move || gdb_thread(target, gdb_port_num))
3025 .map_err(Error::SpawnGdbServer)?;
3026 };
3027
Dylan Reid059a1882018-07-23 17:58:09 -07003028 vcpu_thread_barrier.wait();
3029
Michael Hoylee392c462020-10-07 03:29:24 -07003030 'wait: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07003031 let events = {
Michael Hoylee392c462020-10-07 03:29:24 -07003032 match wait_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08003033 Ok(v) => v,
3034 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08003035 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003036 break;
3037 }
3038 }
3039 };
Zach Reiznera60744b2019-02-13 17:33:32 -08003040
Steven Richmanf32d0b42020-06-20 21:45:32 -07003041 if let Err(e) = linux.irq_chip.process_delayed_irq_events() {
3042 warn!("can't deliver delayed irqs: {}", e);
3043 }
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08003044
Zach Reiznera60744b2019-02-13 17:33:32 -08003045 let mut vm_control_indices_to_remove = Vec::new();
Michael Hoylee392c462020-10-07 03:29:24 -07003046 for event in events.iter().filter(|e| e.is_readable) {
3047 match event.token {
Zach Reizner5bed0d22018-03-28 02:31:11 -07003048 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08003049 info!("vcpu requested shutdown");
Michael Hoylee392c462020-10-07 03:29:24 -07003050 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08003051 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003052 Token::Suspend => {
3053 info!("VM requested suspend");
3054 linux.suspend_evt.read().unwrap();
Zach Reiznerdc748482021-04-14 13:59:30 -07003055 kick_all_vcpus(
3056 &vcpu_handles,
3057 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003058 VcpuControl::RunState(VmRunMode::Suspending),
Zach Reiznerdc748482021-04-14 13:59:30 -07003059 );
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003060 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003061 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08003062 // Print all available siginfo structs, then exit the loop.
David Tolnayf5032762018-12-03 10:46:45 -08003063 while let Some(siginfo) = sigchld_fd.read().map_err(Error::SignalFd)? {
Zach Reizner3ba00982019-01-23 19:04:43 -08003064 let pid = siginfo.ssi_pid;
3065 let pid_label = match linux.pid_debug_label_map.get(&pid) {
3066 Some(label) => format!("{} (pid {})", label, pid),
3067 None => format!("pid {}", pid),
3068 };
David Tolnayf5032762018-12-03 10:46:45 -08003069 error!(
3070 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08003071 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08003072 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08003073 }
Michael Hoylee392c462020-10-07 03:29:24 -07003074 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08003075 }
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003076 Token::IrqFd { index } => {
3077 if let Err(e) = linux.irq_chip.service_irq_event(index) {
3078 error!("failed to signal irq {}: {}", index, e);
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08003079 }
3080 }
Charles William Dick0bf8a552019-10-29 15:36:01 +09003081 Token::BalanceMemory => {
Michael Hoyle08d86a42020-08-19 14:45:21 -07003082 balancemem_timer.wait().map_err(Error::Timer)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09003083 let command = BalloonControlCommand::Stats {};
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003084 if let Err(e) = balloon_host_tube.send(&command) {
Charles William Dick0bf8a552019-10-29 15:36:01 +09003085 warn!("failed to send stats request to balloon device: {}", e);
Chirantan Ekbote448516e2018-07-24 16:07:42 -07003086 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07003087 }
Charles William Dick0bf8a552019-10-29 15:36:01 +09003088 Token::BalloonResult => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003089 match balloon_host_tube.recv() {
Charles William Dick0bf8a552019-10-29 15:36:01 +09003090 Ok(BalloonControlResult::Stats {
3091 stats,
3092 balloon_actual: balloon_actual_u,
3093 }) => {
Charles William Dick0e3d4b62020-12-14 12:16:46 +09003094 match balloon_policy
3095 .as_mut()
3096 .map(|p| p.delta(stats, balloon_actual_u))
3097 {
3098 None => {
3099 error!(
3100 "got result from balloon stats, but no policy is running"
3101 );
Charles William Dick0bf8a552019-10-29 15:36:01 +09003102 }
Charles William Dick0e3d4b62020-12-14 12:16:46 +09003103 Some(Err(e)) => {
3104 warn!("failed to run balloon policy {}", e);
3105 }
3106 Some(Ok(delta)) if delta != 0 => {
3107 let target = max((balloon_actual_u as i64) + delta, 0) as u64;
3108 let command =
3109 BalloonControlCommand::Adjust { num_bytes: target };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003110 if let Err(e) = balloon_host_tube.send(&command) {
Charles William Dick0e3d4b62020-12-14 12:16:46 +09003111 warn!(
3112 "failed to send memory value to balloon device: {}",
3113 e
3114 );
3115 }
3116 }
3117 Some(Ok(_)) => {}
Charles William Dick0bf8a552019-10-29 15:36:01 +09003118 }
3119 }
3120 Err(e) => {
3121 error!("failed to recv BalloonControlResult: {}", e);
3122 }
3123 };
Chirantan Ekbote448516e2018-07-24 16:07:42 -07003124 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003125 Token::VmControlServer => {
3126 if let Some(socket_server) = &control_server_socket {
3127 match socket_server.accept() {
3128 Ok(socket) => {
Michael Hoylee392c462020-10-07 03:29:24 -07003129 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08003130 .add(
3131 &socket,
3132 Token::VmControl {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003133 index: control_tubes.len(),
Zach Reiznera60744b2019-02-13 17:33:32 -08003134 },
3135 )
Michael Hoylee392c462020-10-07 03:29:24 -07003136 .map_err(Error::WaitContextAdd)?;
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003137 control_tubes.push(TaggedControlTube::Vm(Tube::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08003138 }
3139 Err(e) => error!("failed to accept socket: {}", e),
3140 }
3141 }
3142 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003143 Token::VmControl { index } => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003144 if let Some(socket) = control_tubes.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003145 match socket {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003146 TaggedControlTube::Vm(tube) => match tube.recv::<VmRequest>() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003147 Ok(request) => {
3148 let mut run_mode_opt = None;
3149 let response = request.execute(
3150 &mut run_mode_opt,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003151 &balloon_host_tube,
3152 disk_host_tubes,
Daniel Verkampf1439d42021-05-21 13:55:10 -07003153 #[cfg(feature = "usb")]
3154 Some(&usb_control_tube),
3155 #[cfg(not(feature = "usb"))]
3156 None,
Chuanxiao Dong256be3a2020-04-27 16:39:33 +08003157 &mut linux.bat_control,
Jakub Starond99cd0a2019-04-11 14:09:39 -07003158 );
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003159 if let Err(e) = tube.send(&response) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003160 error!("failed to send VmResponse: {}", e);
3161 }
3162 if let Some(run_mode) = run_mode_opt {
3163 info!("control socket changed run mode to {}", run_mode);
3164 match run_mode {
3165 VmRunMode::Exiting => {
Michael Hoylee392c462020-10-07 03:29:24 -07003166 break 'wait;
Jakub Starond99cd0a2019-04-11 14:09:39 -07003167 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09003168 other => {
Chuanxiao Dong2bbe85c2020-11-12 17:18:07 +08003169 if other == VmRunMode::Running {
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003170 linux.io_bus.notify_resume();
3171 }
Steven Richman11dc6712020-09-02 15:39:14 -07003172 kick_all_vcpus(
3173 &vcpu_handles,
Zach Reiznerdc748482021-04-14 13:59:30 -07003174 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003175 VcpuControl::RunState(other),
Steven Richman11dc6712020-09-02 15:39:14 -07003176 );
Zach Reizner6a8fdd92019-01-16 14:38:41 -08003177 }
3178 }
3179 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003180 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003181 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003182 if let TubeError::Disconnected = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07003183 vm_control_indices_to_remove.push(index);
3184 } else {
3185 error!("failed to recv VmRequest: {}", e);
3186 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003187 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003188 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003189 TaggedControlTube::VmMemory(tube) => {
3190 match tube.recv::<VmMemoryRequest>() {
3191 Ok(request) => {
3192 let response = request.execute(
3193 &mut linux.vm,
Zach Reiznerdc748482021-04-14 13:59:30 -07003194 &mut sys_allocator,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003195 Arc::clone(&map_request),
3196 &mut gralloc,
3197 );
3198 if let Err(e) = tube.send(&response) {
3199 error!("failed to send VmMemoryControlResponse: {}", e);
3200 }
3201 }
3202 Err(e) => {
3203 if let TubeError::Disconnected = e {
3204 vm_control_indices_to_remove.push(index);
3205 } else {
3206 error!("failed to recv VmMemoryControlRequest: {}", e);
3207 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07003208 }
3209 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003210 }
3211 TaggedControlTube::VmIrq(tube) => match tube.recv::<VmIrqRequest>() {
Xiong Zhang2515b752019-09-19 10:29:02 +08003212 Ok(request) => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07003213 let response = {
3214 let irq_chip = &mut linux.irq_chip;
3215 request.execute(
3216 |setup| match setup {
3217 IrqSetup::Event(irq, ev) => {
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003218 if let Some(event_index) = irq_chip
3219 .register_irq_event(irq, ev, None)?
3220 {
3221 match wait_ctx.add(
3222 ev,
3223 Token::IrqFd {
3224 index: event_index
3225 },
3226 ) {
3227 Err(e) => {
3228 warn!("failed to add IrqFd to poll context: {}", e);
3229 Err(e)
3230 },
3231 Ok(_) => {
3232 Ok(())
3233 }
3234 }
3235 } else {
3236 Ok(())
3237 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07003238 }
3239 IrqSetup::Route(route) => irq_chip.route_irq(route),
3240 },
Zach Reiznerdc748482021-04-14 13:59:30 -07003241 &mut sys_allocator,
Steven Richmanf32d0b42020-06-20 21:45:32 -07003242 )
3243 };
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003244 if let Err(e) = tube.send(&response) {
Xiong Zhang2515b752019-09-19 10:29:02 +08003245 error!("failed to send VmIrqResponse: {}", e);
3246 }
3247 }
3248 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003249 if let TubeError::Disconnected = e {
Xiong Zhang2515b752019-09-19 10:29:02 +08003250 vm_control_indices_to_remove.push(index);
3251 } else {
3252 error!("failed to recv VmIrqRequest: {}", e);
3253 }
3254 }
3255 },
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003256 TaggedControlTube::VmMsync(tube) => {
3257 match tube.recv::<VmMsyncRequest>() {
3258 Ok(request) => {
3259 let response = request.execute(&mut linux.vm);
3260 if let Err(e) = tube.send(&response) {
3261 error!("failed to send VmMsyncResponse: {}", e);
3262 }
3263 }
3264 Err(e) => {
3265 if let TubeError::Disconnected = e {
3266 vm_control_indices_to_remove.push(index);
3267 } else {
3268 error!("failed to recv VmMsyncRequest: {}", e);
3269 }
Daniel Verkampe1980a92020-02-07 11:00:55 -08003270 }
3271 }
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003272 }
3273 TaggedControlTube::Fs(tube) => match tube.recv::<FsMappingRequest>() {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003274 Ok(request) => {
3275 let response =
Zach Reiznerdc748482021-04-14 13:59:30 -07003276 request.execute(&mut linux.vm, &mut sys_allocator);
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003277 if let Err(e) = tube.send(&response) {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003278 error!("failed to send VmResponse: {}", e);
3279 }
3280 }
3281 Err(e) => {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003282 if let TubeError::Disconnected = e {
Keiichi Watanabeeefe7fb2020-11-17 17:58:35 +09003283 vm_control_indices_to_remove.push(index);
3284 } else {
3285 error!("failed to recv VmResponse: {}", e);
3286 }
3287 }
3288 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08003289 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003290 }
3291 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003292 }
3293 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003294
Michael Hoylee392c462020-10-07 03:29:24 -07003295 for event in events.iter().filter(|e| e.is_hungup) {
3296 match event.token {
Zach Reiznera60744b2019-02-13 17:33:32 -08003297 Token::Exit => {}
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08003298 Token::Suspend => {}
Zach Reiznera60744b2019-02-13 17:33:32 -08003299 Token::ChildSignal => {}
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07003300 Token::IrqFd { index: _ } => {}
Charles William Dick0bf8a552019-10-29 15:36:01 +09003301 Token::BalanceMemory => {}
3302 Token::BalloonResult => {}
Zach Reiznera60744b2019-02-13 17:33:32 -08003303 Token::VmControlServer => {}
3304 Token::VmControl { index } => {
3305 // It's possible more data is readable and buffered while the socket is hungup,
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003306 // so don't delete the tube from the poll context until we're sure all the
Zach Reiznera60744b2019-02-13 17:33:32 -08003307 // data is read.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003308 if control_tubes
Jakub Starond99cd0a2019-04-11 14:09:39 -07003309 .get(index)
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003310 .map(|s| !s.as_ref().is_packet_ready())
3311 .unwrap_or(false)
Jakub Starond99cd0a2019-04-11 14:09:39 -07003312 {
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003313 vm_control_indices_to_remove.push(index);
Zach Reizner55a9e502018-10-03 10:22:32 -07003314 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07003315 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003316 }
3317 }
Zach Reiznera60744b2019-02-13 17:33:32 -08003318
3319 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08003320 // preserves correct indexes as each element is removed.
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07003321 vm_control_indices_to_remove.sort_unstable_by_key(|&k| Reverse(k));
Zach Reiznera60744b2019-02-13 17:33:32 -08003322 vm_control_indices_to_remove.dedup();
3323 for index in vm_control_indices_to_remove {
Michael Hoylee392c462020-10-07 03:29:24 -07003324 // Delete the socket from the `wait_ctx` synchronously. Otherwise, the kernel will do
3325 // this automatically when the FD inserted into the `wait_ctx` is closed after this
Zide Chen89584072019-11-14 10:33:51 -08003326 // if-block, but this removal can be deferred unpredictably. In some instances where the
Michael Hoylee392c462020-10-07 03:29:24 -07003327 // system is under heavy load, we can even get events returned by `wait_ctx` for an FD
Zide Chen89584072019-11-14 10:33:51 -08003328 // that has already been closed. Because the token associated with that spurious event
3329 // now belongs to a different socket, the control loop will start to interact with
3330 // sockets that might not be ready to use. This can cause incorrect hangup detection or
3331 // blocking on a socket that will never be ready. See also: crbug.com/1019986
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003332 if let Some(socket) = control_tubes.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07003333 wait_ctx.delete(socket).map_err(Error::WaitContextDelete)?;
Zide Chen89584072019-11-14 10:33:51 -08003334 }
3335
3336 // This line implicitly drops the socket at `index` when it gets returned by
3337 // `swap_remove`. After this line, the socket at `index` is not the one from
3338 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
Michael Hoylee392c462020-10-07 03:29:24 -07003339 // use `wait_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003340 control_tubes.swap_remove(index);
3341 if let Some(tube) = control_tubes.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07003342 wait_ctx
Zach Reiznerd49bcdb2021-01-07 08:30:28 -08003343 .modify(tube, EventType::Read, Token::VmControl { index })
Michael Hoylee392c462020-10-07 03:29:24 -07003344 .map_err(Error::WaitContextAdd)?;
Zach Reiznera60744b2019-02-13 17:33:32 -08003345 }
3346 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08003347 }
3348
Zach Reiznerdc748482021-04-14 13:59:30 -07003349 kick_all_vcpus(
3350 &vcpu_handles,
3351 linux.irq_chip.as_irq_chip(),
Daniel Verkamp29409802021-02-24 14:46:19 -08003352 VcpuControl::RunState(VmRunMode::Exiting),
Zach Reiznerdc748482021-04-14 13:59:30 -07003353 );
Steven Richman11dc6712020-09-02 15:39:14 -07003354 for (handle, _) in vcpu_handles {
3355 if let Err(e) = handle.join() {
3356 error!("failed to join vcpu thread: {:?}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08003357 }
3358 }
3359
Daniel Verkamp94c35272019-09-12 13:31:30 -07003360 // Explicitly drop the VM structure here to allow the devices to clean up before the
3361 // control sockets are closed when this function exits.
3362 mem::drop(linux);
3363
Zach Reizner19ad1f32019-12-12 18:58:50 -08003364 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08003365 .set_canon_mode()
3366 .expect("failed to restore canonical mode for terminal");
3367
3368 Ok(())
3369}