blob: b226e3379a4db1cb59db64a29768898579795e13 [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Charles William Dick0bf8a552019-10-29 15:36:01 +09005use std::cmp::max;
Jakub Starona3411ea2019-04-24 10:55:25 -07006use std::convert::TryFrom;
David Tolnayfdac5ed2019-03-08 16:56:14 -08007use std::error::Error as StdError;
Dylan Reid059a1882018-07-23 17:58:09 -07008use std::ffi::CStr;
David Tolnayc69f9752019-03-01 18:07:56 -08009use std::fmt::{self, Display};
Dylan Reid059a1882018-07-23 17:58:09 -070010use std::fs::{File, OpenOptions};
Zach Reizner55a9e502018-10-03 10:22:32 -070011use std::io::{self, stdin, Read};
Steven Richmanf32d0b42020-06-20 21:45:32 -070012use std::iter;
Daniel Verkamp94c35272019-09-12 13:31:30 -070013use std::mem;
David Tolnay2b089fc2019-03-04 15:33:22 -080014use std::net::Ipv4Addr;
Daniel Verkamp6f9215c2019-08-20 09:41:22 -070015#[cfg(feature = "gpu")]
Zach Reizner0f2cfb02019-06-19 17:46:03 -070016use std::num::NonZeroU8;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +090017use std::num::ParseIntError;
Jakub Starond99cd0a2019-04-11 14:09:39 -070018use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
Zach Reiznera60744b2019-02-13 17:33:32 -080019use std::os::unix::net::UnixStream;
Zach Reizner39aa26b2017-12-12 18:03:23 -080020use std::path::{Path, PathBuf};
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +090021use std::ptr;
Chirantan Ekbote448516e2018-07-24 16:07:42 -070022use std::str;
Dylan Reid059a1882018-07-23 17:58:09 -070023use std::sync::{Arc, Barrier};
Zach Reizner39aa26b2017-12-12 18:03:23 -080024use std::thread;
25use std::thread::JoinHandle;
Charles William Dick0bf8a552019-10-29 15:36:01 +090026use std::time::Duration;
Zach Reizner39aa26b2017-12-12 18:03:23 -080027
David Tolnay41a6f842019-03-01 16:18:44 -080028use libc::{self, c_int, gid_t, uid_t};
Zach Reizner39aa26b2017-12-12 18:03:23 -080029
Tomasz Jeznach42644642020-05-20 23:27:59 -070030use acpi_tables::sdt::SDT;
31
Michael Hoyle6b196952020-08-02 20:09:41 -070032use base::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Zach Reizner65b98f12019-11-22 17:34:58 -080033#[cfg(feature = "gpu")]
34use devices::virtio::EventDevice;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070035use devices::virtio::{self, Console, VirtioDevice};
Xiong Zhang17b0daf2019-04-23 17:14:50 +080036use devices::{
Steven Richmanf32d0b42020-06-20 21:45:32 -070037 self, HostBackendDeviceProvider, KvmKernelIrqChip, PciDevice, VfioContainer, VfioDevice,
38 VfioPciDevice, VirtioPciDevice, XhciController,
Xiong Zhang17b0daf2019-04-23 17:14:50 +080039};
Andrew Scull1590e6f2020-03-18 18:00:47 +000040#[cfg(feature = "audio")]
41use devices::{Ac97Backend, Ac97Dev};
Steven Richmanf32d0b42020-06-20 21:45:32 -070042use hypervisor::kvm::{Kvm, KvmVcpu, KvmVm};
43use hypervisor::{Hypervisor, HypervisorCap, RunnableVcpu, Vcpu, VcpuExit, Vm, VmCap};
Allen Webbf3024c82020-06-19 07:19:48 -070044use minijail::{self, Minijail};
Zach Reiznera60744b2019-02-13 17:33:32 -080045use msg_socket::{MsgError, MsgReceiver, MsgSender, MsgSocket};
David Tolnay2b089fc2019-03-04 15:33:22 -080046use net_util::{Error as NetError, MacAddress, Tap};
David Tolnay3df35522019-03-11 12:36:30 -070047use remain::sorted;
Xiong Zhang87a3b442019-10-29 17:32:44 +080048use resources::{Alloc, MmioType, SystemAllocator};
Zach Reizner6a8fdd92019-01-16 14:38:41 -080049use sync::{Condvar, Mutex};
Jakub Starona3411ea2019-04-24 10:55:25 -070050
Michael Hoyle6b196952020-08-02 20:09:41 -070051use base::{
David Tolnay633426a2019-04-12 12:18:35 -070052 self, block_signal, clear_signal, drop_capabilities, error, flock, get_blocked_signals,
Fletcher Woodruff82ff3972019-10-02 13:11:34 -060053 get_group_id, get_user_id, getegid, geteuid, info, register_rt_signal_handler,
Steven Richmanf32d0b42020-06-20 21:45:32 -070054 set_cpu_affinity, signal, validate_raw_fd, warn, EventFd, ExternalMapping, FlockOperation,
55 Killable, MemoryMappingArena, PollContext, PollToken, Protection, ScopedEvent, SignalFd,
56 Terminal, TimerFd, WatchingEvents, SIGRTMIN,
Zach Reiznera60744b2019-02-13 17:33:32 -080057};
Jakub Starone7c59052019-04-09 12:31:14 -070058use vm_control::{
Jakub Staron1f828d72019-04-11 12:49:29 -070059 BalloonControlCommand, BalloonControlRequestSocket, BalloonControlResponseSocket,
Charles William Dick664cc3c2020-01-10 14:31:52 +090060 BalloonControlResult, DiskControlCommand, DiskControlRequestSocket, DiskControlResponseSocket,
Steven Richmanf32d0b42020-06-20 21:45:32 -070061 DiskControlResult, IrqSetup, UsbControlSocket, VmControlResponseSocket, VmIrqRequest,
62 VmIrqRequestSocket, VmIrqResponse, VmIrqResponseSocket, VmMemoryControlRequestSocket,
63 VmMemoryControlResponseSocket, VmMemoryRequest, VmMemoryResponse, VmMsyncRequest,
64 VmMsyncRequestSocket, VmMsyncResponse, VmMsyncResponseSocket, VmRunMode,
Jakub Starone7c59052019-04-09 12:31:14 -070065};
Dylan Reidec058d62020-07-20 20:21:11 -070066use vm_memory::{GuestAddress, GuestMemory};
Zach Reizner39aa26b2017-12-12 18:03:23 -080067
Daniel Verkamp50740ce2020-02-28 12:36:56 -080068use crate::{Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070069use arch::{
70 self, LinuxArch, RunnableLinuxVm, SerialHardware, SerialParameters, VirtioDeviceStub,
71 VmComponents, VmImage,
72};
Sonny Raoed517d12018-02-13 22:09:43 -080073
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080074#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070075use {
76 aarch64::AArch64 as Arch,
77 devices::{IrqChip, IrqChipAArch64 as IrqChipArch},
78 hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
79};
Zach Reizner55a9e502018-10-03 10:22:32 -070080#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070081use {
82 devices::{IrqChipX86_64, IrqChipX86_64 as IrqChipArch, KvmSplitIrqChip},
83 hypervisor::{VcpuX86_64, VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
84 x86_64::X8664arch as Arch,
85};
Zach Reizner39aa26b2017-12-12 18:03:23 -080086
David Tolnay3df35522019-03-11 12:36:30 -070087#[sorted]
Dylan Reid059a1882018-07-23 17:58:09 -070088#[derive(Debug)]
Zach Reizner39aa26b2017-12-12 18:03:23 -080089pub enum Error {
Michael Hoyle6b196952020-08-02 20:09:41 -070090 AddGpuDeviceMemory(base::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -070091 AddIrqChipVcpu(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -070092 AddPmemDeviceMemory(base::Error),
Lepton Wu60893882018-11-21 11:06:18 -080093 AllocateGpuDeviceAddress,
Jakub Starona3411ea2019-04-24 10:55:25 -070094 AllocatePmemDeviceAddress(resources::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -080095 BalloonDeviceNew(virtio::BalloonError),
Michael Hoyle6b196952020-08-02 20:09:41 -070096 BlockDeviceNew(base::Error),
97 BlockSignal(base::signal::Error),
David Tolnaybe034262019-03-04 17:48:36 -080098 BuildVm(<Arch as LinuxArch>::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -070099 ChownTpmStorage(base::Error),
100 CloneEventFd(base::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700101 CloneVcpu(base::Error),
102 ConfigureVcpu(<Arch as LinuxArch>::Error),
Andrew Scull1590e6f2020-03-18 18:00:47 +0000103 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +0800104 CreateAc97(devices::PciDeviceError),
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -0700105 CreateConsole(arch::serial::Error),
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700106 CreateDiskError(disk::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700107 CreateEventFd(base::Error),
108 CreatePollContext(base::Error),
109 CreateSignalFd(base::SignalFdError),
Zach Reizner8fb52112017-12-13 16:04:39 -0800110 CreateSocket(io::Error),
Chirantan Ekbote49fa08f2018-11-16 13:26:53 -0800111 CreateTapDevice(NetError),
Michael Hoyle6b196952020-08-02 20:09:41 -0700112 CreateTimerFd(base::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -0800113 CreateTpmStorage(PathBuf, io::Error),
Jingkui Wang100e6e42019-03-08 20:41:57 -0800114 CreateUsbProvider(devices::usb::host_backend::error::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700115 CreateVcpu(base::Error),
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800116 CreateVfioDevice(devices::vfio::VfioError),
Allen Webbf3024c82020-06-19 07:19:48 -0700117 DeviceJail(minijail::Error),
118 DevicePivotRoot(minijail::Error),
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800119 Disk(PathBuf, io::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700120 DiskImageLock(base::Error),
121 DropCapabilities(base::Error),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900122 FsDeviceNew(virtio::fs::Error),
123 GetMaxOpenFiles(io::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700124 GetSignalMask(signal::Error),
Lepton Wu39133a02019-02-27 12:42:29 -0800125 InputDeviceNew(virtio::InputError),
126 InputEventsOpen(std::io::Error),
Dylan Reid20566442018-04-02 15:06:15 -0700127 InvalidFdPath,
Zach Reizner579bd2c2018-09-14 15:43:33 -0700128 InvalidWaylandPath,
Allen Webbf3024c82020-06-19 07:19:48 -0700129 IoJail(minijail::Error),
David Tolnayfdac5ed2019-03-08 16:56:14 -0800130 LoadKernel(Box<dyn StdError>),
Daniel Verkamp6a847062019-11-26 13:16:35 -0800131 MemoryTooLarge,
David Tolnay2b089fc2019-03-04 15:33:22 -0800132 NetDeviceNew(virtio::NetError),
Tomasz Jeznach42644642020-05-20 23:27:59 -0700133 OpenAcpiTable(PathBuf, io::Error),
Tristan Muntsinger4133b012018-12-21 16:01:56 -0800134 OpenAndroidFstab(PathBuf, io::Error),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700135 OpenBios(PathBuf, io::Error),
Daniel Verkampe403f5c2018-12-11 16:29:26 -0800136 OpenInitrd(PathBuf, io::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800137 OpenKernel(PathBuf, io::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -0800138 OpenVinput(PathBuf, io::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800139 P9DeviceNew(virtio::P9Error),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900140 ParseMaxOpenFiles(ParseIntError),
Lepton Wu39133a02019-02-27 12:42:29 -0800141 PivotRootDoesntExist(&'static str),
Jakub Starona3411ea2019-04-24 10:55:25 -0700142 PmemDeviceImageTooBig,
Michael Hoyle6b196952020-08-02 20:09:41 -0700143 PmemDeviceNew(base::Error),
144 PollContextAdd(base::Error),
145 PollContextDelete(base::Error),
Charles William Dick0bf8a552019-10-29 15:36:01 +0900146 ReadMemAvailable(io::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700147 RegisterBalloon(arch::DeviceRegistrationError),
148 RegisterBlock(arch::DeviceRegistrationError),
149 RegisterGpu(arch::DeviceRegistrationError),
150 RegisterNet(arch::DeviceRegistrationError),
151 RegisterP9(arch::DeviceRegistrationError),
152 RegisterRng(arch::DeviceRegistrationError),
Michael Hoyle6b196952020-08-02 20:09:41 -0700153 RegisterSignalHandler(base::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700154 RegisterWayland(arch::DeviceRegistrationError),
Michael Hoyle6b196952020-08-02 20:09:41 -0700155 ReserveGpuMemory(base::MmapError),
156 ReserveMemory(base::Error),
157 ReservePmemMemory(base::MmapError),
158 ResetTimerFd(base::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800159 RngDeviceNew(virtio::RngError),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700160 RunnableVcpu(base::Error),
Allen Webbf3024c82020-06-19 07:19:48 -0700161 SettingGidMap(minijail::Error),
162 SettingMaxOpenFiles(minijail::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700163 SettingSignalMask(base::Error),
Allen Webbf3024c82020-06-19 07:19:48 -0700164 SettingUidMap(minijail::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700165 SignalFd(base::SignalFdError),
Zach Reizner8fb52112017-12-13 16:04:39 -0800166 SpawnVcpu(io::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700167 TimerFd(base::Error),
168 ValidateRawFd(base::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800169 VhostNetDeviceNew(virtio::vhost::Error),
170 VhostVsockDeviceNew(virtio::vhost::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700171 VirtioPciDev(base::Error),
172 WaylandDeviceNew(base::Error),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800173}
174
David Tolnayc69f9752019-03-01 18:07:56 -0800175impl Display for Error {
David Tolnay3df35522019-03-11 12:36:30 -0700176 #[remain::check]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800177 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
David Tolnayc69f9752019-03-01 18:07:56 -0800178 use self::Error::*;
179
David Tolnay3df35522019-03-11 12:36:30 -0700180 #[sorted]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800181 match self {
Lepton Wu60893882018-11-21 11:06:18 -0800182 AddGpuDeviceMemory(e) => write!(f, "failed to add gpu device memory: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700183 AddIrqChipVcpu(e) => write!(f, "failed to add vcpu to irq chip: {}", e),
Jakub Starona3411ea2019-04-24 10:55:25 -0700184 AddPmemDeviceMemory(e) => write!(f, "failed to add pmem device memory: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800185 AllocateGpuDeviceAddress => write!(f, "failed to allocate gpu device guest address"),
Jakub Starona3411ea2019-04-24 10:55:25 -0700186 AllocatePmemDeviceAddress(e) => {
187 write!(f, "failed to allocate memory for pmem device: {}", e)
188 }
David Tolnayc69f9752019-03-01 18:07:56 -0800189 BalloonDeviceNew(e) => write!(f, "failed to create balloon: {}", e),
190 BlockDeviceNew(e) => write!(f, "failed to create block device: {}", e),
191 BlockSignal(e) => write!(f, "failed to block signal: {}", e),
David Tolnaybe034262019-03-04 17:48:36 -0800192 BuildVm(e) => write!(f, "The architecture failed to build the vm: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800193 ChownTpmStorage(e) => write!(f, "failed to chown tpm storage: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800194 CloneEventFd(e) => write!(f, "failed to clone eventfd: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700195 CloneVcpu(e) => write!(f, "failed to clone vcpu: {}", e),
196 ConfigureVcpu(e) => write!(f, "failed to configure vcpu: {}", e),
Andrew Scull1590e6f2020-03-18 18:00:47 +0000197 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +0800198 CreateAc97(e) => write!(f, "failed to create ac97 device: {}", e),
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -0700199 CreateConsole(e) => write!(f, "failed to create console device: {}", e),
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700200 CreateDiskError(e) => write!(f, "failed to create virtual disk: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800201 CreateEventFd(e) => write!(f, "failed to create eventfd: {}", e),
202 CreatePollContext(e) => write!(f, "failed to create poll context: {}", e),
203 CreateSignalFd(e) => write!(f, "failed to create signalfd: {}", e),
204 CreateSocket(e) => write!(f, "failed to create socket: {}", e),
205 CreateTapDevice(e) => write!(f, "failed to create tap device: {}", e),
206 CreateTimerFd(e) => write!(f, "failed to create timerfd: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800207 CreateTpmStorage(p, e) => {
208 write!(f, "failed to create tpm storage dir {}: {}", p.display(), e)
209 }
Jingkui Wang100e6e42019-03-08 20:41:57 -0800210 CreateUsbProvider(e) => write!(f, "failed to create usb provider: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700211 CreateVcpu(e) => write!(f, "failed to create vcpu: {}", e),
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800212 CreateVfioDevice(e) => write!(f, "Failed to create vfio device {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800213 DeviceJail(e) => write!(f, "failed to jail device: {}", e),
214 DevicePivotRoot(e) => write!(f, "failed to pivot root device: {}", e),
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800215 Disk(p, e) => write!(f, "failed to load disk image {}: {}", p.display(), e),
David Tolnayc69f9752019-03-01 18:07:56 -0800216 DiskImageLock(e) => write!(f, "failed to lock disk image: {}", e),
Dmitry Torokhov71006072019-03-06 10:56:51 -0800217 DropCapabilities(e) => write!(f, "failed to drop process capabilities: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900218 FsDeviceNew(e) => write!(f, "failed to create fs device: {}", e),
219 GetMaxOpenFiles(e) => write!(f, "failed to get max number of open files: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700220 GetSignalMask(e) => write!(f, "failed to retrieve signal mask for vcpu: {}", e),
David Tolnay64cd5ea2019-04-15 15:56:35 -0700221 InputDeviceNew(e) => write!(f, "failed to set up input device: {}", e),
222 InputEventsOpen(e) => write!(f, "failed to open event device: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800223 InvalidFdPath => write!(f, "failed parsing a /proc/self/fd/*"),
224 InvalidWaylandPath => write!(f, "wayland socket path has no parent or file name"),
David Tolnayfd0971d2019-03-04 17:15:57 -0800225 IoJail(e) => write!(f, "{}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800226 LoadKernel(e) => write!(f, "failed to load kernel: {}", e),
Daniel Verkamp6a847062019-11-26 13:16:35 -0800227 MemoryTooLarge => write!(f, "requested memory size too large"),
David Tolnayc69f9752019-03-01 18:07:56 -0800228 NetDeviceNew(e) => write!(f, "failed to set up virtio networking: {}", e),
Tomasz Jeznach42644642020-05-20 23:27:59 -0700229 OpenAcpiTable(p, e) => write!(f, "failed to open ACPI file {}: {}", p.display(), e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800230 OpenAndroidFstab(p, e) => write!(
David Tolnayb4bd00f2019-02-12 17:51:26 -0800231 f,
232 "failed to open android fstab file {}: {}",
233 p.display(),
234 e
235 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700236 OpenBios(p, e) => write!(f, "failed to open bios {}: {}", p.display(), e),
David Tolnay3df35522019-03-11 12:36:30 -0700237 OpenInitrd(p, e) => write!(f, "failed to open initrd {}: {}", p.display(), e),
238 OpenKernel(p, e) => write!(f, "failed to open kernel image {}: {}", p.display(), e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800239 OpenVinput(p, e) => write!(f, "failed to open vinput device {}: {}", p.display(), e),
David Tolnayc69f9752019-03-01 18:07:56 -0800240 P9DeviceNew(e) => write!(f, "failed to create 9p device: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900241 ParseMaxOpenFiles(e) => write!(f, "failed to parse max number of open files: {}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800242 PivotRootDoesntExist(p) => write!(f, "{} doesn't exist, can't jail devices.", p),
Jakub Starona3411ea2019-04-24 10:55:25 -0700243 PmemDeviceImageTooBig => {
244 write!(f, "failed to create pmem device: pmem device image too big")
245 }
246 PmemDeviceNew(e) => write!(f, "failed to create pmem device: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800247 PollContextAdd(e) => write!(f, "failed to add fd to poll context: {}", e),
248 PollContextDelete(e) => write!(f, "failed to remove fd from poll context: {}", e),
Charles William Dick0bf8a552019-10-29 15:36:01 +0900249 ReadMemAvailable(e) => write!(f, "failed to read /proc/meminfo: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800250 RegisterBalloon(e) => write!(f, "error registering balloon device: {}", e),
251 RegisterBlock(e) => write!(f, "error registering block device: {}", e),
252 RegisterGpu(e) => write!(f, "error registering gpu device: {}", e),
253 RegisterNet(e) => write!(f, "error registering net device: {}", e),
254 RegisterP9(e) => write!(f, "error registering 9p device: {}", e),
255 RegisterRng(e) => write!(f, "error registering rng device: {}", e),
256 RegisterSignalHandler(e) => write!(f, "error registering signal handler: {}", e),
257 RegisterWayland(e) => write!(f, "error registering wayland device: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800258 ReserveGpuMemory(e) => write!(f, "failed to reserve gpu memory: {}", e),
259 ReserveMemory(e) => write!(f, "failed to reserve memory: {}", e),
Jakub Starona3411ea2019-04-24 10:55:25 -0700260 ReservePmemMemory(e) => write!(f, "failed to reserve pmem memory: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800261 ResetTimerFd(e) => write!(f, "failed to reset timerfd: {}", e),
262 RngDeviceNew(e) => write!(f, "failed to set up rng: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700263 RunnableVcpu(e) => write!(f, "failed to set thread id for vcpu: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800264 SettingGidMap(e) => write!(f, "error setting GID map: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900265 SettingMaxOpenFiles(e) => write!(f, "error setting max open files: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700266 SettingSignalMask(e) => write!(f, "failed to set the signal mask for vcpu: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800267 SettingUidMap(e) => write!(f, "error setting UID map: {}", e),
268 SignalFd(e) => write!(f, "failed to read signal fd: {}", e),
269 SpawnVcpu(e) => write!(f, "failed to spawn VCPU thread: {}", e),
270 TimerFd(e) => write!(f, "failed to read timer fd: {}", e),
271 ValidateRawFd(e) => write!(f, "failed to validate raw fd: {}", e),
272 VhostNetDeviceNew(e) => write!(f, "failed to set up vhost networking: {}", e),
273 VhostVsockDeviceNew(e) => write!(f, "failed to set up virtual socket device: {}", e),
274 VirtioPciDev(e) => write!(f, "failed to create virtio pci dev: {}", e),
275 WaylandDeviceNew(e) => write!(f, "failed to create wayland device: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800276 }
277 }
278}
279
Allen Webbf3024c82020-06-19 07:19:48 -0700280impl From<minijail::Error> for Error {
281 fn from(err: minijail::Error) -> Self {
David Tolnayfd0971d2019-03-04 17:15:57 -0800282 Error::IoJail(err)
283 }
284}
285
David Tolnayc69f9752019-03-01 18:07:56 -0800286impl std::error::Error for Error {}
Dylan Reid059a1882018-07-23 17:58:09 -0700287
Zach Reizner39aa26b2017-12-12 18:03:23 -0800288type Result<T> = std::result::Result<T, Error>;
289
Jakub Starond99cd0a2019-04-11 14:09:39 -0700290enum TaggedControlSocket {
291 Vm(VmControlResponseSocket),
Gurchetan Singh53edb812019-05-22 08:57:16 -0700292 VmMemory(VmMemoryControlResponseSocket),
Xiong Zhang2515b752019-09-19 10:29:02 +0800293 VmIrq(VmIrqResponseSocket),
Daniel Verkampe1980a92020-02-07 11:00:55 -0800294 VmMsync(VmMsyncResponseSocket),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700295}
296
297impl AsRef<UnixSeqpacket> for TaggedControlSocket {
298 fn as_ref(&self) -> &UnixSeqpacket {
299 use self::TaggedControlSocket::*;
300 match &self {
Chirantan Ekbote50582532020-01-16 16:49:14 +0900301 Vm(ref socket) => socket.as_ref(),
302 VmMemory(ref socket) => socket.as_ref(),
303 VmIrq(ref socket) => socket.as_ref(),
Daniel Verkampe1980a92020-02-07 11:00:55 -0800304 VmMsync(ref socket) => socket.as_ref(),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700305 }
306 }
307}
308
309impl AsRawFd for TaggedControlSocket {
310 fn as_raw_fd(&self) -> RawFd {
311 self.as_ref().as_raw_fd()
312 }
313}
314
Andrew Walbranf50bab62020-07-07 13:22:53 +0100315fn get_max_open_files() -> Result<u64> {
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +0900316 let mut buf = mem::MaybeUninit::<libc::rlimit64>::zeroed();
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900317
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +0900318 // Safe because this will only modify `buf` and we check the return value.
319 let res = unsafe { libc::prlimit64(0, libc::RLIMIT_NOFILE, ptr::null(), buf.as_mut_ptr()) };
320 if res == 0 {
321 // Safe because the kernel guarantees that the struct is fully initialized.
322 let limit = unsafe { buf.assume_init() };
323 Ok(limit.rlim_max)
324 } else {
325 Err(Error::GetMaxOpenFiles(io::Error::last_os_error()))
326 }
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900327}
328
Matt Delcoc24ad782020-02-14 13:24:36 -0800329struct SandboxConfig<'a> {
330 limit_caps: bool,
331 log_failures: bool,
332 seccomp_policy: &'a Path,
333 uid_map: Option<&'a str>,
334 gid_map: Option<&'a str>,
335}
336
Zach Reizner44863792019-06-26 14:22:08 -0700337fn create_base_minijail(
338 root: &Path,
Matt Delcoc24ad782020-02-14 13:24:36 -0800339 r_limit: Option<u64>,
340 config: Option<&SandboxConfig>,
Zach Reizner44863792019-06-26 14:22:08 -0700341) -> Result<Minijail> {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800342 // All child jails run in a new user namespace without any users mapped,
343 // they run as nobody unless otherwise configured.
David Tolnay5bbbf612018-12-01 17:49:30 -0800344 let mut j = Minijail::new().map_err(Error::DeviceJail)?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800345
346 if let Some(config) = config {
347 j.namespace_pids();
348 j.namespace_user();
349 j.namespace_user_disable_setgroups();
350 if config.limit_caps {
351 // Don't need any capabilities.
352 j.use_caps(0);
353 }
354 if let Some(uid_map) = config.uid_map {
355 j.uidmap(uid_map).map_err(Error::SettingUidMap)?;
356 }
357 if let Some(gid_map) = config.gid_map {
358 j.gidmap(gid_map).map_err(Error::SettingGidMap)?;
359 }
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900360 // Run in a new mount namespace.
361 j.namespace_vfs();
362
Matt Delcoc24ad782020-02-14 13:24:36 -0800363 // Run in an empty network namespace.
364 j.namespace_net();
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900365
366 // Don't allow the device to gain new privileges.
Matt Delcoc24ad782020-02-14 13:24:36 -0800367 j.no_new_privs();
368
369 // By default we'll prioritize using the pre-compiled .bpf over the .policy
370 // file (the .bpf is expected to be compiled using "trap" as the failure
371 // behavior instead of the default "kill" behavior).
372 // Refer to the code comment for the "seccomp-log-failures"
373 // command-line parameter for an explanation about why the |log_failures|
374 // flag forces the use of .policy files (and the build-time alternative to
375 // this run-time flag).
376 let bpf_policy_file = config.seccomp_policy.with_extension("bpf");
377 if bpf_policy_file.exists() && !config.log_failures {
378 j.parse_seccomp_program(&bpf_policy_file)
379 .map_err(Error::DeviceJail)?;
380 } else {
381 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP,
382 // which will correctly kill the entire device process if a worker
383 // thread commits a seccomp violation.
384 j.set_seccomp_filter_tsync();
385 if config.log_failures {
386 j.log_seccomp_filter_failures();
387 }
388 j.parse_seccomp_filters(&config.seccomp_policy.with_extension("policy"))
389 .map_err(Error::DeviceJail)?;
390 }
391 j.use_seccomp_filter();
392 // Don't do init setup.
393 j.run_as_init();
394 }
395
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900396 // Only pivot_root if we are not re-using the current root directory.
397 if root != Path::new("/") {
398 // It's safe to call `namespace_vfs` multiple times.
399 j.namespace_vfs();
400 j.enter_pivot_root(root).map_err(Error::DevicePivotRoot)?;
401 }
Matt Delco45caf912019-11-13 08:11:09 -0800402
Matt Delcoc24ad782020-02-14 13:24:36 -0800403 // Most devices don't need to open many fds.
404 let limit = if let Some(r) = r_limit { r } else { 1024u64 };
405 j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)
406 .map_err(Error::SettingMaxOpenFiles)?;
407
Zach Reizner39aa26b2017-12-12 18:03:23 -0800408 Ok(j)
409}
410
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800411fn simple_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
Lepton Wu9105e9f2019-03-14 11:38:31 -0700412 if cfg.sandbox {
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800413 let pivot_root: &str = option_env!("DEFAULT_PIVOT_ROOT").unwrap_or("/var/empty");
414 // A directory for a jailed device's pivot root.
415 let root_path = Path::new(pivot_root);
416 if !root_path.exists() {
417 return Err(Error::PivotRootDoesntExist(pivot_root));
418 }
419 let policy_path: PathBuf = cfg.seccomp_policy_dir.join(policy);
Matt Delcoc24ad782020-02-14 13:24:36 -0800420 let config = SandboxConfig {
421 limit_caps: true,
422 log_failures: cfg.seccomp_log_failures,
423 seccomp_policy: &policy_path,
424 uid_map: None,
425 gid_map: None,
426 };
427 Ok(Some(create_base_minijail(root_path, None, Some(&config))?))
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800428 } else {
429 Ok(None)
430 }
431}
432
David Tolnayfd0971d2019-03-04 17:15:57 -0800433type DeviceResult<T = VirtioDeviceStub> = std::result::Result<T, Error>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800434
435fn create_block_device(
436 cfg: &Config,
437 disk: &DiskOption,
Jakub Staronecf81e02019-04-11 11:43:39 -0700438 disk_device_socket: DiskControlResponseSocket,
David Tolnay2b089fc2019-03-04 15:33:22 -0800439) -> DeviceResult {
440 // Special case '/proc/self/fd/*' paths. The FD is already open, just use it.
441 let raw_image: File = if disk.path.parent() == Some(Path::new("/proc/self/fd")) {
442 // Safe because we will validate |raw_fd|.
443 unsafe { File::from_raw_fd(raw_fd_from_path(&disk.path)?) }
444 } else {
445 OpenOptions::new()
446 .read(true)
447 .write(!disk.read_only)
448 .open(&disk.path)
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800449 .map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?
David Tolnay2b089fc2019-03-04 15:33:22 -0800450 };
451 // Lock the disk image to prevent other crosvm instances from using it.
452 let lock_op = if disk.read_only {
453 FlockOperation::LockShared
454 } else {
455 FlockOperation::LockExclusive
456 };
457 flock(&raw_image, lock_op, true).map_err(Error::DiskImageLock)?;
458
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700459 let disk_file = disk::create_disk_file(raw_image).map_err(Error::CreateDiskError)?;
Daniel Verkampe73c80f2019-11-08 10:11:16 -0800460 let dev = virtio::Block::new(
461 disk_file,
462 disk.read_only,
463 disk.sparse,
Daniel Verkamp27672232019-12-06 17:26:55 +1100464 disk.block_size,
Daniel Verkampe73c80f2019-11-08 10:11:16 -0800465 Some(disk_device_socket),
466 )
467 .map_err(Error::BlockDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800468
469 Ok(VirtioDeviceStub {
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700470 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800471 jail: simple_jail(&cfg, "block_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800472 })
473}
474
475fn create_rng_device(cfg: &Config) -> DeviceResult {
476 let dev = virtio::Rng::new().map_err(Error::RngDeviceNew)?;
477
478 Ok(VirtioDeviceStub {
479 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800480 jail: simple_jail(&cfg, "rng_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800481 })
482}
483
484#[cfg(feature = "tpm")]
485fn create_tpm_device(cfg: &Config) -> DeviceResult {
Michael Hoyle6b196952020-08-02 20:09:41 -0700486 use base::chown;
David Tolnay2b089fc2019-03-04 15:33:22 -0800487 use std::ffi::CString;
488 use std::fs;
489 use std::process;
David Tolnay2b089fc2019-03-04 15:33:22 -0800490
491 let tpm_storage: PathBuf;
Matt Delco45caf912019-11-13 08:11:09 -0800492 let mut tpm_jail = simple_jail(&cfg, "tpm_device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800493
494 match &mut tpm_jail {
495 Some(jail) => {
496 // Create a tmpfs in the device's root directory for tpm
497 // simulator storage. The size is 20*1024, or 20 KB.
498 jail.mount_with_data(
499 Path::new("none"),
500 Path::new("/"),
501 "tmpfs",
502 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
503 "size=20480",
504 )?;
505
506 let crosvm_ids = add_crosvm_user_to_jail(jail, "tpm")?;
507
508 let pid = process::id();
509 let tpm_pid_dir = format!("/run/vm/tpm.{}", pid);
510 tpm_storage = Path::new(&tpm_pid_dir).to_owned();
David Tolnayfd0971d2019-03-04 17:15:57 -0800511 fs::create_dir_all(&tpm_storage)
512 .map_err(|e| Error::CreateTpmStorage(tpm_storage.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800513 let tpm_pid_dir_c = CString::new(tpm_pid_dir).expect("no nul bytes");
David Tolnayfd0971d2019-03-04 17:15:57 -0800514 chown(&tpm_pid_dir_c, crosvm_ids.uid, crosvm_ids.gid)
515 .map_err(Error::ChownTpmStorage)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800516
517 jail.mount_bind(&tpm_storage, &tpm_storage, true)?;
518 }
519 None => {
520 // Path used inside cros_sdk which does not have /run/vm.
521 tpm_storage = Path::new("/tmp/tpm-simulator").to_owned();
522 }
523 }
524
525 let dev = virtio::Tpm::new(tpm_storage);
526
527 Ok(VirtioDeviceStub {
528 dev: Box::new(dev),
529 jail: tpm_jail,
530 })
531}
532
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800533fn create_single_touch_device(cfg: &Config, single_touch_spec: &TouchDeviceOption) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800534 let socket = single_touch_spec
535 .get_path()
536 .into_unix_stream()
537 .map_err(|e| {
538 error!("failed configuring virtio single touch: {:?}", e);
539 e
540 })?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800541
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800542 let (width, height) = single_touch_spec.get_size();
543 let dev = virtio::new_single_touch(socket, width, height).map_err(Error::InputDeviceNew)?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800544 Ok(VirtioDeviceStub {
545 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800546 jail: simple_jail(&cfg, "input_device")?,
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800547 })
548}
549
550fn create_trackpad_device(cfg: &Config, trackpad_spec: &TouchDeviceOption) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800551 let socket = trackpad_spec.get_path().into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800552 error!("failed configuring virtio trackpad: {}", e);
553 e
554 })?;
555
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800556 let (width, height) = trackpad_spec.get_size();
557 let dev = virtio::new_trackpad(socket, width, height).map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800558
559 Ok(VirtioDeviceStub {
560 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800561 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800562 })
563}
564
Zach Reizner65b98f12019-11-22 17:34:58 -0800565fn create_mouse_device<T: IntoUnixStream>(cfg: &Config, mouse_socket: T) -> DeviceResult {
566 let socket = mouse_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800567 error!("failed configuring virtio mouse: {}", e);
568 e
569 })?;
570
571 let dev = virtio::new_mouse(socket).map_err(Error::InputDeviceNew)?;
572
573 Ok(VirtioDeviceStub {
574 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800575 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800576 })
577}
578
Zach Reizner65b98f12019-11-22 17:34:58 -0800579fn create_keyboard_device<T: IntoUnixStream>(cfg: &Config, keyboard_socket: T) -> DeviceResult {
580 let socket = keyboard_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800581 error!("failed configuring virtio keyboard: {}", e);
582 e
583 })?;
584
585 let dev = virtio::new_keyboard(socket).map_err(Error::InputDeviceNew)?;
586
587 Ok(VirtioDeviceStub {
588 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800589 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800590 })
591}
592
593fn create_vinput_device(cfg: &Config, dev_path: &Path) -> DeviceResult {
594 let dev_file = OpenOptions::new()
595 .read(true)
596 .write(true)
597 .open(dev_path)
David Tolnayfd0971d2019-03-04 17:15:57 -0800598 .map_err(|e| Error::OpenVinput(dev_path.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800599
600 let dev = virtio::new_evdev(dev_file).map_err(Error::InputDeviceNew)?;
601
602 Ok(VirtioDeviceStub {
603 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800604 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800605 })
606}
607
Jakub Staron1f828d72019-04-11 12:49:29 -0700608fn create_balloon_device(cfg: &Config, socket: BalloonControlResponseSocket) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800609 let dev = virtio::Balloon::new(socket).map_err(Error::BalloonDeviceNew)?;
610
611 Ok(VirtioDeviceStub {
612 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800613 jail: simple_jail(&cfg, "balloon_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800614 })
615}
616
617fn create_tap_net_device(cfg: &Config, tap_fd: RawFd) -> DeviceResult {
618 // Safe because we ensure that we get a unique handle to the fd.
619 let tap = unsafe {
620 Tap::from_raw_fd(validate_raw_fd(tap_fd).map_err(Error::ValidateRawFd)?)
621 .map_err(Error::CreateTapDevice)?
622 };
623
Xiong Zhang773c7072020-03-20 10:39:55 +0800624 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
625 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700626 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800627 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
628 vq_pairs = 1;
629 }
630 let dev = virtio::Net::from(tap, vq_pairs).map_err(Error::NetDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800631
632 Ok(VirtioDeviceStub {
633 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800634 jail: simple_jail(&cfg, "net_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800635 })
636}
637
638fn create_net_device(
639 cfg: &Config,
640 host_ip: Ipv4Addr,
641 netmask: Ipv4Addr,
642 mac_address: MacAddress,
643 mem: &GuestMemory,
644) -> DeviceResult {
Xiong Zhang773c7072020-03-20 10:39:55 +0800645 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
646 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700647 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800648 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
649 vq_pairs = 1;
650 }
651
David Tolnay2b089fc2019-03-04 15:33:22 -0800652 let dev = if cfg.vhost_net {
653 let dev =
654 virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(host_ip, netmask, mac_address, mem)
655 .map_err(Error::VhostNetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800656 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800657 } else {
Xiong Zhang773c7072020-03-20 10:39:55 +0800658 let dev = virtio::Net::<Tap>::new(host_ip, netmask, mac_address, vq_pairs)
659 .map_err(Error::NetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800660 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800661 };
662
663 let policy = if cfg.vhost_net {
Matt Delco45caf912019-11-13 08:11:09 -0800664 "vhost_net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800665 } else {
Matt Delco45caf912019-11-13 08:11:09 -0800666 "net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800667 };
668
669 Ok(VirtioDeviceStub {
670 dev,
671 jail: simple_jail(&cfg, policy)?,
672 })
673}
674
675#[cfg(feature = "gpu")]
676fn create_gpu_device(
677 cfg: &Config,
678 exit_evt: &EventFd,
Gurchetan Singh7ec58fa2019-05-15 15:30:38 -0700679 gpu_device_socket: VmMemoryControlRequestSocket,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900680 gpu_sockets: Vec<virtio::resource_bridge::ResourceResponseSocket>,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900681 wayland_socket_path: Option<&PathBuf>,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700682 x_display: Option<String>,
Zach Reizner65b98f12019-11-22 17:34:58 -0800683 event_devices: Vec<EventDevice>,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700684 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800685) -> DeviceResult {
686 let jailed_wayland_path = Path::new("/wayland-0");
687
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700688 let mut display_backends = vec![
689 virtio::DisplayBackend::X(x_display),
Jason Macnak60eb1fb2020-01-09 14:36:29 -0800690 virtio::DisplayBackend::Stub,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700691 ];
692
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900693 if let Some(socket_path) = wayland_socket_path {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700694 display_backends.insert(
695 0,
696 virtio::DisplayBackend::Wayland(if cfg.sandbox {
697 Some(jailed_wayland_path.to_owned())
698 } else {
699 Some(socket_path.to_owned())
700 }),
701 );
702 }
703
David Tolnay2b089fc2019-03-04 15:33:22 -0800704 let dev = virtio::Gpu::new(
705 exit_evt.try_clone().map_err(Error::CloneEventFd)?,
Gurchetan Singh7ec58fa2019-05-15 15:30:38 -0700706 Some(gpu_device_socket),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700707 NonZeroU8::new(1).unwrap(), // number of scanouts
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900708 gpu_sockets,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700709 display_backends,
Jason Macnakcc7070b2019-11-06 14:48:12 -0800710 cfg.gpu_parameters.as_ref().unwrap(),
Zach Reizner65b98f12019-11-22 17:34:58 -0800711 event_devices,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700712 map_request,
713 cfg.sandbox,
David Tolnay2b089fc2019-03-04 15:33:22 -0800714 );
715
Matt Delco45caf912019-11-13 08:11:09 -0800716 let jail = match simple_jail(&cfg, "gpu_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800717 Some(mut jail) => {
718 // Create a tmpfs in the device's root directory so that we can bind mount the
719 // dri directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
720 jail.mount_with_data(
721 Path::new("none"),
722 Path::new("/"),
723 "tmpfs",
724 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
725 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800726 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800727
728 // Device nodes required for DRM.
729 let sys_dev_char_path = Path::new("/sys/dev/char");
David Tolnayfd0971d2019-03-04 17:15:57 -0800730 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800731 let sys_devices_path = Path::new("/sys/devices");
David Tolnayfd0971d2019-03-04 17:15:57 -0800732 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800733 let drm_dri_path = Path::new("/dev/dri");
David Tolnayfd0971d2019-03-04 17:15:57 -0800734 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800735
David Riley06787c52019-07-24 12:09:07 -0700736 // If the ARM specific devices exist on the host, bind mount them in.
737 let mali0_path = Path::new("/dev/mali0");
738 if mali0_path.exists() {
739 jail.mount_bind(mali0_path, mali0_path, true)?;
740 }
741
742 let pvr_sync_path = Path::new("/dev/pvr_sync");
743 if pvr_sync_path.exists() {
744 jail.mount_bind(pvr_sync_path, pvr_sync_path, true)?;
745 }
746
David Tolnay2b089fc2019-03-04 15:33:22 -0800747 // Libraries that are required when mesa drivers are dynamically loaded.
David Riley06787c52019-07-24 12:09:07 -0700748 let lib_dirs = &["/usr/lib", "/usr/lib64", "/lib", "/lib64"];
749 for dir in lib_dirs {
750 let dir_path = Path::new(dir);
751 if dir_path.exists() {
752 jail.mount_bind(dir_path, dir_path, false)?;
753 }
754 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800755
756 // Bind mount the wayland socket into jail's root. This is necessary since each
757 // new wayland context must open() the socket.
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700758 if let Some(path) = wayland_socket_path {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900759 jail.mount_bind(path, jailed_wayland_path, true)?;
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700760 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800761
762 add_crosvm_user_to_jail(&mut jail, "gpu")?;
763
David Riley54e660b2019-07-24 17:22:50 -0700764 // pvr driver requires read access to /proc/self/task/*/comm.
765 let proc_path = Path::new("/proc");
766 jail.mount(
767 proc_path,
768 proc_path,
769 "proc",
770 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY) as usize,
771 )?;
772
David Tolnay2b089fc2019-03-04 15:33:22 -0800773 Some(jail)
774 }
775 None => None,
776 };
777
778 Ok(VirtioDeviceStub {
779 dev: Box::new(dev),
780 jail,
781 })
782}
783
784fn create_wayland_device(
785 cfg: &Config,
Gurchetan Singh53edb812019-05-22 08:57:16 -0700786 socket: VmMemoryControlRequestSocket,
David Tolnay2b089fc2019-03-04 15:33:22 -0800787 resource_bridge: Option<virtio::resource_bridge::ResourceRequestSocket>,
788) -> DeviceResult {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900789 let wayland_socket_dirs = cfg
790 .wayland_socket_paths
791 .iter()
792 .map(|(_name, path)| path.parent())
793 .collect::<Option<Vec<_>>>()
794 .ok_or(Error::InvalidWaylandPath)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800795
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900796 let dev = virtio::Wl::new(cfg.wayland_socket_paths.clone(), socket, resource_bridge)
797 .map_err(Error::WaylandDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800798
Matt Delco45caf912019-11-13 08:11:09 -0800799 let jail = match simple_jail(&cfg, "wl_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800800 Some(mut jail) => {
801 // Create a tmpfs in the device's root directory so that we can bind mount the wayland
802 // socket directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
803 jail.mount_with_data(
804 Path::new("none"),
805 Path::new("/"),
806 "tmpfs",
807 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
808 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800809 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800810
811 // Bind mount the wayland socket's directory into jail's root. This is necessary since
812 // each new wayland context must open() the socket. If the wayland socket is ever
813 // destroyed and remade in the same host directory, new connections will be possible
814 // without restarting the wayland device.
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900815 for dir in &wayland_socket_dirs {
816 jail.mount_bind(dir, dir, true)?;
817 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800818 add_crosvm_user_to_jail(&mut jail, "Wayland")?;
819
820 Some(jail)
821 }
822 None => None,
823 };
824
825 Ok(VirtioDeviceStub {
826 dev: Box::new(dev),
827 jail,
828 })
829}
830
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900831#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
832fn create_video_device(
833 cfg: &Config,
834 typ: devices::virtio::VideoDeviceType,
835 resource_bridge: virtio::resource_bridge::ResourceRequestSocket,
836) -> DeviceResult {
837 let jail = match simple_jail(&cfg, "video_device")? {
838 Some(mut jail) => {
839 match typ {
840 devices::virtio::VideoDeviceType::Decoder => {
841 add_crosvm_user_to_jail(&mut jail, "video-decoder")?
842 }
843 devices::virtio::VideoDeviceType::Encoder => {
844 add_crosvm_user_to_jail(&mut jail, "video-encoder")?
845 }
846 };
847
848 // Create a tmpfs in the device's root directory so that we can bind mount files.
849 jail.mount_with_data(
850 Path::new("none"),
851 Path::new("/"),
852 "tmpfs",
853 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
854 "size=67108864",
855 )?;
856
857 // Render node for libvda.
858 let dev_dri_path = Path::new("/dev/dri/renderD128");
859 jail.mount_bind(dev_dri_path, dev_dri_path, false)?;
860
861 // Device nodes required by libchrome which establishes Mojo connection in libvda.
862 let dev_urandom_path = Path::new("/dev/urandom");
863 jail.mount_bind(dev_urandom_path, dev_urandom_path, false)?;
864 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
865 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
866
867 Some(jail)
868 }
869 None => None,
870 };
871
872 Ok(VirtioDeviceStub {
873 dev: Box::new(devices::virtio::VideoDevice::new(
874 typ,
875 Some(resource_bridge),
876 )),
877 jail,
878 })
879}
880
881#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
882fn register_video_device(
883 devs: &mut Vec<VirtioDeviceStub>,
884 resource_bridges: &mut Vec<virtio::resource_bridge::ResourceResponseSocket>,
885 cfg: &Config,
886 typ: devices::virtio::VideoDeviceType,
887) -> std::result::Result<(), Error> {
888 let (video_socket, gpu_socket) =
889 virtio::resource_bridge::pair().map_err(Error::CreateSocket)?;
890 resource_bridges.push(gpu_socket);
891 devs.push(create_video_device(cfg, typ, video_socket)?);
892 Ok(())
893}
894
David Tolnay2b089fc2019-03-04 15:33:22 -0800895fn create_vhost_vsock_device(cfg: &Config, cid: u64, mem: &GuestMemory) -> DeviceResult {
896 let dev = virtio::vhost::Vsock::new(cid, mem).map_err(Error::VhostVsockDeviceNew)?;
897
898 Ok(VirtioDeviceStub {
899 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800900 jail: simple_jail(&cfg, "vhost_vsock_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800901 })
902}
903
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900904fn create_fs_device(
905 cfg: &Config,
906 uid_map: &str,
907 gid_map: &str,
908 src: &Path,
909 tag: &str,
910 fs_cfg: virtio::fs::passthrough::Config,
911) -> DeviceResult {
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900912 let max_open_files = get_max_open_files()?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800913 let j = if cfg.sandbox {
914 let seccomp_policy = cfg.seccomp_policy_dir.join("fs_device");
915 let config = SandboxConfig {
916 limit_caps: false,
917 uid_map: Some(uid_map),
918 gid_map: Some(gid_map),
919 log_failures: cfg.seccomp_log_failures,
920 seccomp_policy: &seccomp_policy,
921 };
Chirantan Ekbote34d45e52020-04-20 18:15:02 +0900922 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
923 // We want bind mounts from the parent namespaces to propagate into the fs device's
924 // namespace.
925 jail.set_remount_mode(libc::MS_SLAVE);
926
927 jail
Matt Delcoc24ad782020-02-14 13:24:36 -0800928 } else {
929 create_base_minijail(src, Some(max_open_files), None)?
930 };
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900931
932 // TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic
933 // when num_queues > 1.
934 let dev = virtio::fs::Fs::new(tag, 1, fs_cfg).map_err(Error::FsDeviceNew)?;
935
936 Ok(VirtioDeviceStub {
937 dev: Box::new(dev),
938 jail: Some(j),
939 })
940}
941
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +0900942fn create_9p_device(
943 cfg: &Config,
944 uid_map: &str,
945 gid_map: &str,
946 src: &Path,
947 tag: &str,
948) -> DeviceResult {
949 let max_open_files = get_max_open_files()?;
950 let (jail, root) = if cfg.sandbox {
951 let seccomp_policy = cfg.seccomp_policy_dir.join("9p_device");
952 let config = SandboxConfig {
953 limit_caps: false,
954 uid_map: Some(uid_map),
955 gid_map: Some(gid_map),
956 log_failures: cfg.seccomp_log_failures,
957 seccomp_policy: &seccomp_policy,
958 };
David Tolnay2b089fc2019-03-04 15:33:22 -0800959
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +0900960 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
961 // We want bind mounts from the parent namespaces to propagate into the 9p server's
962 // namespace.
963 jail.set_remount_mode(libc::MS_SLAVE);
Chirantan Ekbote055de382020-01-24 12:16:58 +0900964
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +0900965 // The shared directory becomes the root of the device's file system.
966 let root = Path::new("/");
967 (Some(jail), root)
968 } else {
969 // There's no mount namespace so we tell the server to treat the source directory as the
970 // root.
971 (None, src)
David Tolnay2b089fc2019-03-04 15:33:22 -0800972 };
973
974 let dev = virtio::P9::new(root, tag).map_err(Error::P9DeviceNew)?;
975
976 Ok(VirtioDeviceStub {
977 dev: Box::new(dev),
978 jail,
979 })
980}
981
Jakub Starona3411ea2019-04-24 10:55:25 -0700982fn create_pmem_device(
983 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -0700984 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -0700985 resources: &mut SystemAllocator,
986 disk: &DiskOption,
987 index: usize,
Daniel Verkampe1980a92020-02-07 11:00:55 -0800988 pmem_device_socket: VmMsyncRequestSocket,
Jakub Starona3411ea2019-04-24 10:55:25 -0700989) -> DeviceResult {
990 let fd = OpenOptions::new()
991 .read(true)
992 .write(!disk.read_only)
993 .open(&disk.path)
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800994 .map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?;
Jakub Starona3411ea2019-04-24 10:55:25 -0700995
Iliyan Malcheved149862020-04-17 23:57:47 +0000996 let arena_size = {
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800997 let metadata =
998 std::fs::metadata(&disk.path).map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -0800999 let disk_len = metadata.len();
1000 // Linux requires pmem region sizes to be 2 MiB aligned. Linux will fill any partial page
1001 // at the end of an mmap'd file and won't write back beyond the actual file length, but if
1002 // we just align the size of the file to 2 MiB then access beyond the last page of the
1003 // mapped file will generate SIGBUS. So use a memory mapping arena that will provide
1004 // padding up to 2 MiB.
1005 let alignment = 2 * 1024 * 1024;
1006 let align_adjust = if disk_len % alignment != 0 {
1007 alignment - (disk_len % alignment)
1008 } else {
1009 0
1010 };
Iliyan Malcheved149862020-04-17 23:57:47 +00001011 disk_len
1012 .checked_add(align_adjust)
1013 .ok_or(Error::PmemDeviceImageTooBig)?
Jakub Starona3411ea2019-04-24 10:55:25 -07001014 };
1015
1016 let protection = {
1017 if disk.read_only {
1018 Protection::read()
1019 } else {
1020 Protection::read_write()
1021 }
1022 };
1023
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001024 let arena = {
Jakub Starona3411ea2019-04-24 10:55:25 -07001025 // Conversion from u64 to usize may fail on 32bit system.
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001026 let arena_size = usize::try_from(arena_size).map_err(|_| Error::PmemDeviceImageTooBig)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001027
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001028 let mut arena = MemoryMappingArena::new(arena_size).map_err(Error::ReservePmemMemory)?;
1029 arena
Iliyan Malcheved149862020-04-17 23:57:47 +00001030 .add_fd_offset_protection(0, arena_size, &fd, 0, protection)
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001031 .map_err(Error::ReservePmemMemory)?;
1032 arena
Jakub Starona3411ea2019-04-24 10:55:25 -07001033 };
1034
1035 let mapping_address = resources
Xiong Zhang383b3b52019-10-30 14:59:26 +08001036 .mmio_allocator(MmioType::High)
Jakub Starona3411ea2019-04-24 10:55:25 -07001037 .allocate_with_align(
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001038 arena_size,
Jakub Starona3411ea2019-04-24 10:55:25 -07001039 Alloc::PmemDevice(index),
1040 format!("pmem_disk_image_{}", index),
1041 // Linux kernel requires pmem namespaces to be 128 MiB aligned.
1042 128 * 1024 * 1024, /* 128 MiB */
1043 )
1044 .map_err(Error::AllocatePmemDeviceAddress)?;
1045
Daniel Verkampe1980a92020-02-07 11:00:55 -08001046 let slot = vm
Gurchetan Singh173fe622020-05-21 18:05:06 -07001047 .add_memory_region(
Daniel Verkampe1980a92020-02-07 11:00:55 -08001048 GuestAddress(mapping_address),
Gurchetan Singh173fe622020-05-21 18:05:06 -07001049 Box::new(arena),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001050 /* read_only = */ disk.read_only,
1051 /* log_dirty_pages = */ false,
1052 )
1053 .map_err(Error::AddPmemDeviceMemory)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001054
Daniel Verkampe1980a92020-02-07 11:00:55 -08001055 let dev = virtio::Pmem::new(
1056 fd,
1057 GuestAddress(mapping_address),
1058 slot,
1059 arena_size,
1060 Some(pmem_device_socket),
1061 )
1062 .map_err(Error::PmemDeviceNew)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001063
1064 Ok(VirtioDeviceStub {
1065 dev: Box::new(dev) as Box<dyn VirtioDevice>,
Matt Delco45caf912019-11-13 08:11:09 -08001066 jail: simple_jail(&cfg, "pmem_device")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07001067 })
1068}
1069
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001070fn create_console_device(cfg: &Config, param: &SerialParameters) -> DeviceResult {
1071 let mut keep_fds = Vec::new();
1072 let evt = EventFd::new().map_err(Error::CreateEventFd)?;
1073 let dev = param
1074 .create_serial_device::<Console>(&evt, &mut keep_fds)
1075 .map_err(Error::CreateConsole)?;
1076
Nicholas Verne71e73d82020-07-08 17:19:55 +10001077 let jail = match simple_jail(&cfg, "serial")? {
1078 Some(mut jail) => {
1079 // Create a tmpfs in the device's root directory so that we can bind mount the
1080 // log socket directory into it.
1081 // The size=67108864 is size=64*1024*1024 or size=64MB.
1082 jail.mount_with_data(
1083 Path::new("none"),
1084 Path::new("/"),
1085 "tmpfs",
1086 (libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_NOSUID) as usize,
1087 "size=67108864",
1088 )?;
1089 add_crosvm_user_to_jail(&mut jail, "serial")?;
1090 let res = param.add_bind_mounts(&mut jail);
1091 if res.is_err() {
1092 error!("failed to add bind mounts for console device");
1093 }
1094 Some(jail)
1095 }
1096 None => None,
1097 };
1098
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001099 Ok(VirtioDeviceStub {
1100 dev: Box::new(dev),
Nicholas Verne71e73d82020-07-08 17:19:55 +10001101 jail, // TODO(dverkamp): use a separate policy for console?
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001102 })
1103}
1104
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -07001105// gpu_device_socket is not used when GPU support is disabled.
1106#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001107fn create_virtio_devices(
1108 cfg: &Config,
Zach Reizner55a9e502018-10-03 10:22:32 -07001109 mem: &GuestMemory,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001110 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001111 resources: &mut SystemAllocator,
Zach Reizner55a9e502018-10-03 10:22:32 -07001112 _exit_evt: &EventFd,
Gurchetan Singh53edb812019-05-22 08:57:16 -07001113 wayland_device_socket: VmMemoryControlRequestSocket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001114 gpu_device_socket: VmMemoryControlRequestSocket,
Jakub Staron1f828d72019-04-11 12:49:29 -07001115 balloon_device_socket: BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07001116 disk_device_sockets: &mut Vec<DiskControlResponseSocket>,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001117 pmem_device_sockets: &mut Vec<VmMsyncRequestSocket>,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001118 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnay2b089fc2019-03-04 15:33:22 -08001119) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -07001120 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -08001121
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001122 for (_, param) in cfg
1123 .serial_parameters
1124 .iter()
1125 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
1126 {
1127 let dev = create_console_device(cfg, param)?;
1128 devs.push(dev);
1129 }
1130
Zach Reizner8fb52112017-12-13 16:04:39 -08001131 for disk in &cfg.disks {
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001132 let disk_device_socket = disk_device_sockets.remove(0);
David Tolnay2b089fc2019-03-04 15:33:22 -08001133 devs.push(create_block_device(cfg, disk, disk_device_socket)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001134 }
1135
Jakub Starona3411ea2019-04-24 10:55:25 -07001136 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
Daniel Verkampe1980a92020-02-07 11:00:55 -08001137 let pmem_device_socket = pmem_device_sockets.remove(0);
1138 devs.push(create_pmem_device(
1139 cfg,
1140 vm,
1141 resources,
1142 pmem_disk,
1143 index,
1144 pmem_device_socket,
1145 )?);
Jakub Starona3411ea2019-04-24 10:55:25 -07001146 }
1147
David Tolnay2b089fc2019-03-04 15:33:22 -08001148 devs.push(create_rng_device(cfg)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001149
David Tolnayde6b29a2018-12-20 11:49:46 -08001150 #[cfg(feature = "tpm")]
1151 {
David Tolnay43f8e212019-02-13 17:28:16 -08001152 if cfg.software_tpm {
David Tolnay2b089fc2019-03-04 15:33:22 -08001153 devs.push(create_tpm_device(cfg)?);
David Tolnay43f8e212019-02-13 17:28:16 -08001154 }
David Tolnayde6b29a2018-12-20 11:49:46 -08001155 }
1156
Jorge E. Moreira99d3f082019-03-07 10:59:54 -08001157 if let Some(single_touch_spec) = &cfg.virtio_single_touch {
1158 devs.push(create_single_touch_device(cfg, single_touch_spec)?);
1159 }
1160
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001161 if let Some(trackpad_spec) = &cfg.virtio_trackpad {
David Tolnay2b089fc2019-03-04 15:33:22 -08001162 devs.push(create_trackpad_device(cfg, trackpad_spec)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001163 }
1164
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001165 if let Some(mouse_socket) = &cfg.virtio_mouse {
David Tolnay2b089fc2019-03-04 15:33:22 -08001166 devs.push(create_mouse_device(cfg, mouse_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001167 }
1168
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001169 if let Some(keyboard_socket) = &cfg.virtio_keyboard {
David Tolnay2b089fc2019-03-04 15:33:22 -08001170 devs.push(create_keyboard_device(cfg, keyboard_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001171 }
1172
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001173 for dev_path in &cfg.virtio_input_evdevs {
David Tolnay2b089fc2019-03-04 15:33:22 -08001174 devs.push(create_vinput_device(cfg, dev_path)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001175 }
1176
David Tolnay2b089fc2019-03-04 15:33:22 -08001177 devs.push(create_balloon_device(cfg, balloon_device_socket)?);
Dylan Reid295ccac2017-11-06 14:06:24 -08001178
Zach Reizner39aa26b2017-12-12 18:03:23 -08001179 // We checked above that if the IP is defined, then the netmask is, too.
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001180 for tap_fd in &cfg.tap_fd {
David Tolnay2b089fc2019-03-04 15:33:22 -08001181 devs.push(create_tap_net_device(cfg, *tap_fd)?);
Jorge E. Moreirab7952802019-02-12 16:43:05 -08001182 }
1183
David Tolnay2b089fc2019-03-04 15:33:22 -08001184 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
1185 (cfg.host_ip, cfg.netmask, cfg.mac_address)
1186 {
1187 devs.push(create_net_device(cfg, host_ip, netmask, mac_address, mem)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001188 }
1189
David Tolnayfa701712019-02-13 16:42:54 -08001190 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001191 let mut resource_bridges = Vec::<virtio::resource_bridge::ResourceResponseSocket>::new();
1192
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001193 if !cfg.wayland_socket_paths.is_empty() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001194 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
1195 let mut wl_resource_bridge = None::<virtio::resource_bridge::ResourceRequestSocket>;
1196
1197 #[cfg(feature = "gpu")]
1198 {
Jason Macnakcc7070b2019-11-06 14:48:12 -08001199 if cfg.gpu_parameters.is_some() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001200 let (wl_socket, gpu_socket) =
1201 virtio::resource_bridge::pair().map_err(Error::CreateSocket)?;
1202 resource_bridges.push(gpu_socket);
1203 wl_resource_bridge = Some(wl_socket);
1204 }
1205 }
1206
1207 devs.push(create_wayland_device(
1208 cfg,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001209 wayland_device_socket,
1210 wl_resource_bridge,
1211 )?);
1212 }
David Tolnayfa701712019-02-13 16:42:54 -08001213
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001214 #[cfg(feature = "video-decoder")]
1215 {
1216 if cfg.video_dec {
1217 register_video_device(
1218 &mut devs,
1219 &mut resource_bridges,
1220 cfg,
1221 devices::virtio::VideoDeviceType::Decoder,
1222 )?;
1223 }
1224 }
1225
1226 #[cfg(feature = "video-encoder")]
1227 {
1228 if cfg.video_enc {
1229 register_video_device(
1230 &mut devs,
1231 &mut resource_bridges,
1232 cfg,
1233 devices::virtio::VideoDeviceType::Encoder,
1234 )?;
1235 }
1236 }
1237
Zach Reizner3a8100a2017-09-13 19:15:43 -07001238 #[cfg(feature = "gpu")]
1239 {
Noah Golddc7f52b2020-02-01 13:01:58 -08001240 if let Some(gpu_parameters) = &cfg.gpu_parameters {
Zach Reizner65b98f12019-11-22 17:34:58 -08001241 let mut event_devices = Vec::new();
1242 if cfg.display_window_mouse {
1243 let (event_device_socket, virtio_dev_socket) =
1244 UnixStream::pair().map_err(Error::CreateSocket)?;
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001245 let (single_touch_width, single_touch_height) = cfg
1246 .virtio_single_touch
1247 .as_ref()
1248 .map(|single_touch_spec| single_touch_spec.get_size())
Noah Golddc7f52b2020-02-01 13:01:58 -08001249 .unwrap_or((gpu_parameters.display_width, gpu_parameters.display_height));
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001250 let dev = virtio::new_single_touch(
1251 virtio_dev_socket,
1252 single_touch_width,
1253 single_touch_height,
1254 )
1255 .map_err(Error::InputDeviceNew)?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001256 devs.push(VirtioDeviceStub {
1257 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -08001258 jail: simple_jail(&cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001259 });
1260 event_devices.push(EventDevice::touchscreen(event_device_socket));
1261 }
1262 if cfg.display_window_keyboard {
1263 let (event_device_socket, virtio_dev_socket) =
1264 UnixStream::pair().map_err(Error::CreateSocket)?;
1265 let dev = virtio::new_keyboard(virtio_dev_socket).map_err(Error::InputDeviceNew)?;
1266 devs.push(VirtioDeviceStub {
1267 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -08001268 jail: simple_jail(&cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001269 });
1270 event_devices.push(EventDevice::keyboard(event_device_socket));
1271 }
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001272 devs.push(create_gpu_device(
1273 cfg,
1274 _exit_evt,
1275 gpu_device_socket,
1276 resource_bridges,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001277 // Use the unnamed socket for GPU display screens.
1278 cfg.wayland_socket_paths.get(""),
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001279 cfg.x_display.clone(),
Zach Reizner65b98f12019-11-22 17:34:58 -08001280 event_devices,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001281 map_request,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001282 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -07001283 }
1284 }
1285
Zach Reizneraa575662018-08-15 10:46:32 -07001286 if let Some(cid) = cfg.cid {
David Tolnay2b089fc2019-03-04 15:33:22 -08001287 devs.push(create_vhost_vsock_device(cfg, cid, mem)?);
Zach Reizneraa575662018-08-15 10:46:32 -07001288 }
1289
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001290 for shared_dir in &cfg.shared_dirs {
1291 let SharedDir {
1292 src,
1293 tag,
1294 kind,
1295 uid_map,
1296 gid_map,
1297 cfg: fs_cfg,
1298 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -08001299
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001300 let dev = match kind {
1301 SharedDirKind::FS => create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone())?,
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001302 SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag)?,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001303 };
1304 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -08001305 }
1306
1307 Ok(devs)
1308}
1309
1310fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -06001311 cfg: &Config,
David Tolnay2b089fc2019-03-04 15:33:22 -08001312 mem: &GuestMemory,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001313 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001314 resources: &mut SystemAllocator,
David Tolnay2b089fc2019-03-04 15:33:22 -08001315 exit_evt: &EventFd,
Xiong Zhanga5d248c2019-09-17 14:17:19 -07001316 control_sockets: &mut Vec<TaggedControlSocket>,
Gurchetan Singh53edb812019-05-22 08:57:16 -07001317 wayland_device_socket: VmMemoryControlRequestSocket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001318 gpu_device_socket: VmMemoryControlRequestSocket,
Jakub Staron1f828d72019-04-11 12:49:29 -07001319 balloon_device_socket: BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07001320 disk_device_sockets: &mut Vec<DiskControlResponseSocket>,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001321 pmem_device_sockets: &mut Vec<VmMsyncRequestSocket>,
Jingkui Wang100e6e42019-03-08 20:41:57 -08001322 usb_provider: HostBackendDeviceProvider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001323 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnayfdac5ed2019-03-08 16:56:14 -08001324) -> DeviceResult<Vec<(Box<dyn PciDevice>, Option<Minijail>)>> {
David Tolnay2b089fc2019-03-04 15:33:22 -08001325 let stubs = create_virtio_devices(
1326 &cfg,
1327 mem,
Jakub Starona3411ea2019-04-24 10:55:25 -07001328 vm,
1329 resources,
David Tolnay2b089fc2019-03-04 15:33:22 -08001330 exit_evt,
1331 wayland_device_socket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001332 gpu_device_socket,
David Tolnay2b089fc2019-03-04 15:33:22 -08001333 balloon_device_socket,
1334 disk_device_sockets,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001335 pmem_device_sockets,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001336 map_request,
David Tolnay2b089fc2019-03-04 15:33:22 -08001337 )?;
1338
1339 let mut pci_devices = Vec::new();
1340
1341 for stub in stubs {
Daniel Verkampbb712d62019-11-19 09:47:33 -08001342 let (msi_host_socket, msi_device_socket) =
1343 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
1344 control_sockets.push(TaggedControlSocket::VmIrq(msi_host_socket));
1345 let dev = VirtioPciDevice::new(mem.clone(), stub.dev, msi_device_socket)
1346 .map_err(Error::VirtioPciDev)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -08001347 let dev = Box::new(dev) as Box<dyn PciDevice>;
David Tolnay2b089fc2019-03-04 15:33:22 -08001348 pci_devices.push((dev, stub.jail));
1349 }
1350
Andrew Scull1590e6f2020-03-18 18:00:47 +00001351 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +08001352 for ac97_param in &cfg.ac97_parameters {
1353 let dev = Ac97Dev::try_new(mem.clone(), ac97_param.clone()).map_err(Error::CreateAc97)?;
1354 let policy = match ac97_param.backend {
1355 Ac97Backend::CRAS => "cras_audio_device",
1356 Ac97Backend::NULL => "null_audio_device",
1357 };
David Tolnay2b089fc2019-03-04 15:33:22 -08001358
Judy Hsiaod5c1e962020-02-04 12:30:01 +08001359 pci_devices.push((Box::new(dev), simple_jail(&cfg, &policy)?));
David Tolnay2b089fc2019-03-04 15:33:22 -08001360 }
Andrew Scull1590e6f2020-03-18 18:00:47 +00001361
Jingkui Wang100e6e42019-03-08 20:41:57 -08001362 // Create xhci controller.
1363 let usb_controller = Box::new(XhciController::new(mem.clone(), usb_provider));
Matt Delco45caf912019-11-13 08:11:09 -08001364 pci_devices.push((usb_controller, simple_jail(&cfg, "xhci")?));
David Tolnay2b089fc2019-03-04 15:33:22 -08001365
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001366 if !cfg.vfio.is_empty() {
Xiong Zhangea6cf662019-11-11 18:32:02 +08001367 let vfio_container = Arc::new(Mutex::new(
1368 VfioContainer::new().map_err(Error::CreateVfioDevice)?,
1369 ));
1370
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001371 for vfio_path in &cfg.vfio {
1372 // create one Irq and Mem request socket for each vfio device
1373 let (vfio_host_socket_irq, vfio_device_socket_irq) =
1374 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
1375 control_sockets.push(TaggedControlSocket::VmIrq(vfio_host_socket_irq));
Xiong Zhang4b5bb3a2019-04-23 17:15:21 +08001376
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001377 let (vfio_host_socket_mem, vfio_device_socket_mem) =
1378 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>()
1379 .map_err(Error::CreateSocket)?;
1380 control_sockets.push(TaggedControlSocket::VmMemory(vfio_host_socket_mem));
Xiong Zhang85abeff2019-04-23 17:15:24 +08001381
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001382 let vfiodevice = VfioDevice::new(vfio_path.as_path(), vm, mem, vfio_container.clone())
1383 .map_err(Error::CreateVfioDevice)?;
1384 let vfiopcidevice = Box::new(VfioPciDevice::new(
1385 vfiodevice,
1386 vfio_device_socket_irq,
1387 vfio_device_socket_mem,
1388 ));
1389 pci_devices.push((vfiopcidevice, simple_jail(&cfg, "vfio_device")?));
1390 }
Xiong Zhang17b0daf2019-04-23 17:14:50 +08001391 }
1392
David Tolnay2b089fc2019-03-04 15:33:22 -08001393 Ok(pci_devices)
1394}
1395
1396#[derive(Copy, Clone)]
Chirantan Ekbote1a2683b2019-11-26 16:28:23 +09001397#[cfg_attr(not(feature = "tpm"), allow(dead_code))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001398struct Ids {
1399 uid: uid_t,
1400 gid: gid_t,
1401}
1402
David Tolnay48c48292019-03-01 16:54:25 -08001403// Set the uid/gid for the jailed process and give a basic id map. This is
1404// required for bind mounts to work.
David Tolnayfd0971d2019-03-04 17:15:57 -08001405fn add_crosvm_user_to_jail(jail: &mut Minijail, feature: &str) -> Result<Ids> {
David Tolnay48c48292019-03-01 16:54:25 -08001406 let crosvm_user_group = CStr::from_bytes_with_nul(b"crosvm\0").unwrap();
1407
1408 let crosvm_uid = match get_user_id(&crosvm_user_group) {
1409 Ok(u) => u,
1410 Err(e) => {
1411 warn!("falling back to current user id for {}: {}", feature, e);
1412 geteuid()
1413 }
1414 };
1415
1416 let crosvm_gid = match get_group_id(&crosvm_user_group) {
1417 Ok(u) => u,
1418 Err(e) => {
1419 warn!("falling back to current group id for {}: {}", feature, e);
1420 getegid()
1421 }
1422 };
1423
1424 jail.change_uid(crosvm_uid);
1425 jail.change_gid(crosvm_gid);
1426 jail.uidmap(&format!("{0} {0} 1", crosvm_uid))
1427 .map_err(Error::SettingUidMap)?;
1428 jail.gidmap(&format!("{0} {0} 1", crosvm_gid))
1429 .map_err(Error::SettingGidMap)?;
1430
David Tolnay41a6f842019-03-01 16:18:44 -08001431 Ok(Ids {
1432 uid: crosvm_uid,
1433 gid: crosvm_gid,
1434 })
David Tolnay48c48292019-03-01 16:54:25 -08001435}
1436
David Tolnayfd0971d2019-03-04 17:15:57 -08001437fn raw_fd_from_path(path: &Path) -> Result<RawFd> {
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001438 if !path.is_file() {
David Tolnayfd0971d2019-03-04 17:15:57 -08001439 return Err(Error::InvalidFdPath);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001440 }
1441 let raw_fd = path
1442 .file_name()
1443 .and_then(|fd_osstr| fd_osstr.to_str())
1444 .and_then(|fd_str| fd_str.parse::<c_int>().ok())
1445 .ok_or(Error::InvalidFdPath)?;
David Tolnayfd0971d2019-03-04 17:15:57 -08001446 validate_raw_fd(raw_fd).map_err(Error::ValidateRawFd)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001447}
1448
Zach Reizner65b98f12019-11-22 17:34:58 -08001449trait IntoUnixStream {
1450 fn into_unix_stream(self) -> Result<UnixStream>;
1451}
1452
1453impl<'a> IntoUnixStream for &'a Path {
1454 fn into_unix_stream(self) -> Result<UnixStream> {
1455 if self.parent() == Some(Path::new("/proc/self/fd")) {
1456 // Safe because we will validate |raw_fd|.
1457 unsafe { Ok(UnixStream::from_raw_fd(raw_fd_from_path(self)?)) }
1458 } else {
1459 UnixStream::connect(self).map_err(Error::InputEventsOpen)
1460 }
1461 }
1462}
1463impl<'a> IntoUnixStream for &'a PathBuf {
1464 fn into_unix_stream(self) -> Result<UnixStream> {
1465 self.as_path().into_unix_stream()
1466 }
1467}
1468
1469impl IntoUnixStream for UnixStream {
1470 fn into_unix_stream(self) -> Result<UnixStream> {
1471 Ok(self)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001472 }
1473}
1474
Steven Richmanf32d0b42020-06-20 21:45:32 -07001475fn setup_vcpu_signal_handler<T: Vcpu>(use_hypervisor_signals: bool) -> Result<()> {
1476 if use_hypervisor_signals {
Matt Delco84cf9c02019-10-07 22:38:13 -07001477 unsafe {
1478 extern "C" fn handle_signal() {}
1479 // Our signal handler does nothing and is trivially async signal safe.
1480 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
1481 .map_err(Error::RegisterSignalHandler)?;
1482 }
1483 block_signal(SIGRTMIN() + 0).map_err(Error::BlockSignal)?;
1484 } else {
1485 unsafe {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001486 extern "C" fn handle_signal<T: Vcpu>() {
1487 T::set_local_immediate_exit(true);
Matt Delco84cf9c02019-10-07 22:38:13 -07001488 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001489 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal::<T>)
Matt Delco84cf9c02019-10-07 22:38:13 -07001490 .map_err(Error::RegisterSignalHandler)?;
1491 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001492 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001493 Ok(())
1494}
1495
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001496#[derive(Default)]
1497struct VcpuRunMode {
1498 mtx: Mutex<VmRunMode>,
1499 cvar: Condvar,
1500}
1501
1502impl VcpuRunMode {
1503 fn set_and_notify(&self, new_mode: VmRunMode) {
1504 *self.mtx.lock() = new_mode;
1505 self.cvar.notify_all();
1506 }
1507}
1508
Steven Richmanf32d0b42020-06-20 21:45:32 -07001509// Sets up a vcpu and converts it into a runnable vcpu.
1510fn runnable_vcpu<V, R>(
1511 cpu_id: usize,
1512 vcpu: Option<V>,
1513 vm: impl VmArch<Vcpu = V>,
1514 irq_chip: &mut impl IrqChipArch<V>,
1515 vcpu_count: usize,
1516 vcpu_affinity: Vec<usize>,
1517 has_bios: bool,
1518 use_hypervisor_signals: bool,
1519) -> Result<R>
1520where
1521 V: VcpuArch<Runnable = R>,
1522 R: RunnableVcpu<Vcpu = V>,
1523{
1524 let mut vcpu = if let Some(v) = vcpu {
1525 v
1526 } else {
1527 // If vcpu is None, it means this arch/hypervisor requires create_vcpu to be called from the
1528 // vcpu thread.
1529 vm.create_vcpu(cpu_id).map_err(Error::CreateVcpu)?
1530 };
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001531
Steven Richmanf32d0b42020-06-20 21:45:32 -07001532 irq_chip
1533 .add_vcpu(cpu_id, vcpu.try_clone().map_err(Error::CloneVcpu)?)
1534 .map_err(Error::AddIrqChipVcpu)?;
1535
1536 Arch::configure_vcpu(
1537 vm.get_memory(),
1538 vm.get_hypervisor(),
1539 irq_chip,
1540 &mut vcpu,
1541 cpu_id,
1542 vcpu_count,
1543 has_bios,
1544 )
1545 .map_err(Error::ConfigureVcpu)?;
1546
1547 if !vcpu_affinity.is_empty() {
1548 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
1549 error!("Failed to set CPU affinity: {}", e);
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001550 }
1551 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001552
1553 #[cfg(feature = "chromeos")]
1554 if let Err(e) = base::sched::enable_core_scheduling() {
1555 error!("Failed to enable core scheduling: {}", e);
1556 }
1557
1558 if use_hypervisor_signals {
1559 let mut v = get_blocked_signals().map_err(Error::GetSignalMask)?;
1560 v.retain(|&x| x != SIGRTMIN() + 0);
1561 vcpu.set_signal_mask(&v).map_err(Error::SettingSignalMask)?;
1562 }
1563
1564 vcpu.to_runnable(Some(SIGRTMIN() + 0))
1565 .map_err(Error::RunnableVcpu)
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001566}
1567
Zhuocheng Dingdb4c70d2019-12-02 15:50:24 +08001568#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -07001569fn inject_interrupt<T: VcpuX86_64>(
1570 irq_chip: &mut impl IrqChipX86_64<T>,
1571 vcpu: &impl VcpuX86_64,
1572 vcpu_id: usize,
1573) {
1574 if !irq_chip.interrupt_requested(vcpu_id) || !vcpu.ready_for_interrupt() {
1575 return;
1576 }
1577
1578 let vector = irq_chip
1579 .get_external_interrupt(vcpu_id)
1580 .unwrap_or_else(|e| {
1581 error!("get_external_interrupt failed on vcpu {}: {}", vcpu_id, e);
1582 None
1583 });
1584 if let Some(vector) = vector {
1585 if let Err(e) = vcpu.interrupt(vector as u32) {
1586 error!(
1587 "Failed to inject interrupt {} to vcpu {}: {}",
1588 vector, vcpu_id, e
1589 );
Zhuocheng Dingdb4c70d2019-12-02 15:50:24 +08001590 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001591 }
1592
1593 // The second interrupt request should be handled immediately, so ask vCPU to exit as soon as
1594 // possible.
1595 if irq_chip.interrupt_requested(vcpu_id) {
1596 vcpu.request_interrupt_window();
Zhuocheng Dingdb4c70d2019-12-02 15:50:24 +08001597 }
1598}
1599
1600#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -07001601fn inject_interrupt<T: Vcpu>(_irq_chip: &mut impl IrqChip<T>, _vcpu: &impl Vcpu, _vcpu_id: usize) {}
Zhuocheng Dingdb4c70d2019-12-02 15:50:24 +08001602
Steven Richmanf32d0b42020-06-20 21:45:32 -07001603fn run_vcpu<V, R>(
1604 cpu_id: usize,
1605 vcpu: Option<V>,
1606 vm: impl VmArch<Vcpu = V> + 'static,
1607 mut irq_chip: impl IrqChipArch<V> + 'static,
1608 vcpu_count: usize,
Daniel Verkamp107edb32019-04-05 09:58:48 -07001609 vcpu_affinity: Vec<usize>,
Zach Reizner55a9e502018-10-03 10:22:32 -07001610 start_barrier: Arc<Barrier>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001611 has_bios: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001612 io_bus: devices::Bus,
1613 mmio_bus: devices::Bus,
1614 exit_evt: EventFd,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001615 requires_pvclock_ctrl: bool,
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001616 run_mode_arc: Arc<VcpuRunMode>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001617 use_hypervisor_signals: bool,
1618) -> Result<JoinHandle<()>>
1619where
1620 V: VcpuArch<Runnable = R> + 'static,
1621 R: RunnableVcpu<Vcpu = V>,
1622{
Zach Reizner8fb52112017-12-13 16:04:39 -08001623 thread::Builder::new()
1624 .name(format!("crosvm_vcpu{}", cpu_id))
1625 .spawn(move || {
Zach Reizner95885312020-01-29 18:06:01 -08001626 // The VCPU thread must trigger the `exit_evt` in all paths, and a `ScopedEvent`'s Drop
1627 // implementation accomplishes that.
1628 let _scoped_exit_evt = ScopedEvent::from(exit_evt);
1629
Steven Richmanf32d0b42020-06-20 21:45:32 -07001630 let vcpu = runnable_vcpu(
1631 cpu_id,
1632 vcpu,
1633 vm,
1634 &mut irq_chip,
1635 vcpu_count,
1636 vcpu_affinity,
1637 has_bios,
1638 use_hypervisor_signals,
1639 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08001640
Zach Reizner8fb52112017-12-13 16:04:39 -08001641 start_barrier.wait();
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001642
Steven Richmanf32d0b42020-06-20 21:45:32 -07001643 let vcpu = match vcpu {
1644 Ok(v) => v,
1645 Err(e) => {
1646 error!("failed to start vcpu {}: {}", cpu_id, e);
1647 return;
1648 }
1649 };
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001650
Steven Richmanf32d0b42020-06-20 21:45:32 -07001651 loop {
1652 let mut interrupted_by_signal = false;
1653 match vcpu.run() {
1654 Ok(VcpuExit::IoIn { port, mut size }) => {
1655 let mut data = [0; 8];
1656 if size > data.len() {
1657 error!("unsupported IoIn size of {} bytes", size);
1658 size = data.len();
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001659 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001660 io_bus.read(port as u64, &mut data[..size]);
1661 if let Err(e) = vcpu.set_data(&data[..size]) {
1662 error!("failed to set return data for IoIn: {}", e);
1663 }
1664 }
1665 Ok(VcpuExit::IoOut {
1666 port,
1667 mut size,
1668 data,
1669 }) => {
1670 if size > data.len() {
1671 error!("unsupported IoOut size of {} bytes", size);
1672 size = data.len();
1673 }
1674 io_bus.write(port as u64, &data[..size]);
1675 }
1676 Ok(VcpuExit::MmioRead { address, size }) => {
1677 let mut data = [0; 8];
1678 mmio_bus.read(address, &mut data[..size]);
1679 // Setting data for mmio can not fail.
1680 let _ = vcpu.set_data(&data[..size]);
1681 }
1682 Ok(VcpuExit::MmioWrite {
1683 address,
1684 size,
1685 data,
1686 }) => {
1687 mmio_bus.write(address, &data[..size]);
1688 }
1689 Ok(VcpuExit::IoapicEoi { vector }) => {
1690 if let Err(e) = irq_chip.broadcast_eoi(vector) {
1691 error!(
1692 "failed to broadcast eoi {} on vcpu {}: {}",
1693 vector, cpu_id, e
1694 );
1695 }
1696 }
1697 Ok(VcpuExit::Hlt) => break,
1698 Ok(VcpuExit::Shutdown) => break,
1699 Ok(VcpuExit::FailEntry {
1700 hardware_entry_failure_reason,
1701 }) => {
1702 error!("vcpu hw run failure: {:#x}", hardware_entry_failure_reason);
1703 break;
1704 }
1705 Ok(VcpuExit::SystemEvent(_, _)) => break,
1706 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
1707 Err(e) => match e.errno() {
1708 libc::EINTR => interrupted_by_signal = true,
1709 libc::EAGAIN => {}
1710 _ => {
1711 error!("vcpu hit unknown error: {}", e);
1712 break;
1713 }
1714 },
1715 }
1716
1717 if interrupted_by_signal {
1718 if use_hypervisor_signals {
1719 // Try to clear the signal that we use to kick VCPU if it is pending before
1720 // attempting to handle pause requests.
1721 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
1722 error!("failed to clear pending signal: {}", e);
1723 break;
1724 }
1725 } else {
1726 vcpu.set_immediate_exit(false);
1727 }
1728 let mut run_mode_lock = run_mode_arc.mtx.lock();
1729 loop {
1730 match *run_mode_lock {
1731 VmRunMode::Running => break,
1732 VmRunMode::Suspending => {
1733 // On KVM implementations that use a paravirtualized clock (e.g.
1734 // x86), a flag must be set to indicate to the guest kernel that a
1735 // VCPU was suspended. The guest kernel will use this flag to
1736 // prevent the soft lockup detection from triggering when this VCPU
1737 // resumes, which could happen days later in realtime.
1738 if requires_pvclock_ctrl {
1739 if let Err(e) = vcpu.pvclock_ctrl() {
1740 error!(
1741 "failed to tell hypervisor vcpu {} is suspending: {}",
1742 cpu_id, e
1743 );
Zach Reizner795355a2019-01-16 17:37:57 -08001744 }
1745 }
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001746 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001747 VmRunMode::Exiting => return,
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001748 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001749 // Give ownership of our exclusive lock to the condition variable that will
1750 // block. When the condition variable is notified, `wait` will unblock and
1751 // return a new exclusive lock.
1752 run_mode_lock = run_mode_arc.cvar.wait(run_mode_lock);
Zhuocheng Dingdb4c70d2019-12-02 15:50:24 +08001753 }
David Tolnay8f3a2322018-11-30 17:11:35 -08001754 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001755
1756 inject_interrupt(&mut irq_chip, vcpu.deref(), cpu_id);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001757 }
David Tolnay2bac1e72018-12-12 14:33:42 -08001758 })
1759 .map_err(Error::SpawnVcpu)
Zach Reizner39aa26b2017-12-12 18:03:23 -08001760}
1761
Charles William Dick0bf8a552019-10-29 15:36:01 +09001762// Reads the contents of a file and converts the space-separated fields into a Vec of i64s.
Sonny Raod5f66082019-04-24 12:24:38 -07001763// Returns an error if any of the fields fail to parse.
Charles William Dick0bf8a552019-10-29 15:36:01 +09001764fn file_fields_to_i64<P: AsRef<Path>>(path: P) -> io::Result<Vec<i64>> {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001765 let mut file = File::open(path)?;
1766
1767 let mut buf = [0u8; 32];
1768 let count = file.read(&mut buf)?;
1769
Zach Reizner55a9e502018-10-03 10:22:32 -07001770 let content =
1771 str::from_utf8(&buf[..count]).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1772 content
1773 .trim()
Sonny Raod5f66082019-04-24 12:24:38 -07001774 .split_whitespace()
1775 .map(|x| {
Charles William Dick0bf8a552019-10-29 15:36:01 +09001776 x.parse::<i64>()
Sonny Raod5f66082019-04-24 12:24:38 -07001777 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
1778 })
1779 .collect()
1780}
1781
1782// Reads the contents of a file and converts them into a u64, and if there
1783// are multiple fields it only returns the first one.
Charles William Dick0bf8a552019-10-29 15:36:01 +09001784fn file_to_i64<P: AsRef<Path>>(path: P) -> io::Result<i64> {
1785 file_fields_to_i64(path)?
Sonny Raod5f66082019-04-24 12:24:38 -07001786 .into_iter()
1787 .next()
1788 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "empty file"))
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001789}
1790
Steven Richmanf32d0b42020-06-20 21:45:32 -07001791fn create_kvm(mem: GuestMemory) -> base::Result<KvmVm> {
1792 let kvm = Kvm::new()?;
1793 let vm = KvmVm::new(&kvm, mem)?;
1794 Ok(vm)
1795}
1796
1797fn create_kvm_kernel_irq_chip(
1798 vm: &KvmVm,
1799 vcpu_count: usize,
1800 _ioapic_device_socket: VmIrqRequestSocket,
1801) -> base::Result<impl IrqChipArch<KvmVcpu>> {
1802 let irq_chip = KvmKernelIrqChip::new(vm.try_clone()?, vcpu_count)?;
1803 Ok(irq_chip)
1804}
1805
1806#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1807fn create_kvm_split_irq_chip(
1808 vm: &KvmVm,
1809 vcpu_count: usize,
1810 ioapic_device_socket: VmIrqRequestSocket,
1811) -> base::Result<impl IrqChipArch<KvmVcpu>> {
1812 let irq_chip = KvmSplitIrqChip::new(vm.try_clone()?, vcpu_count, ioapic_device_socket)?;
1813 Ok(irq_chip)
1814}
1815
Dylan Reid059a1882018-07-23 17:58:09 -07001816pub fn run_config(cfg: Config) -> Result<()> {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001817 if cfg.split_irqchip {
1818 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
1819 {
1820 unimplemented!("KVM split irqchip mode only supported on x86 processors")
1821 }
1822
1823 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1824 {
1825 run_vm(cfg, create_kvm, create_kvm_split_irq_chip)
1826 }
1827 } else {
1828 run_vm(cfg, create_kvm, create_kvm_kernel_irq_chip)
1829 }
1830}
1831
1832fn run_vm<V, I, FV, FI>(cfg: Config, create_vm: FV, create_irq_chip: FI) -> Result<()>
1833where
1834 V: VmArch + 'static,
1835 I: IrqChipArch<V::Vcpu> + 'static,
1836 FV: FnOnce(GuestMemory) -> base::Result<V>,
1837 FI: FnOnce(
1838 &V,
1839 usize, // vcpu_count
1840 VmIrqRequestSocket, // ioapic_device_socket
1841 ) -> base::Result<I>,
1842{
Lepton Wu9105e9f2019-03-14 11:38:31 -07001843 if cfg.sandbox {
Dylan Reid059a1882018-07-23 17:58:09 -07001844 // Printing something to the syslog before entering minijail so that libc's syslogger has a
1845 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
1846 // access to those files will not be possible.
1847 info!("crosvm entering multiprocess mode");
1848 }
1849
Jingkui Wang100e6e42019-03-08 20:41:57 -08001850 let (usb_control_socket, usb_provider) =
David Tolnay5fb3f512019-04-12 19:22:33 -07001851 HostBackendDeviceProvider::new().map_err(Error::CreateUsbProvider)?;
Dylan Reid059a1882018-07-23 17:58:09 -07001852 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
1853 // before any jailed devices have been spawned, so that we can catch any of them that fail very
1854 // quickly.
1855 let sigchld_fd = SignalFd::new(libc::SIGCHLD).map_err(Error::CreateSignalFd)?;
1856
David Tolnay2b089fc2019-03-04 15:33:22 -08001857 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
1858 Some(File::open(initrd_path).map_err(|e| Error::OpenInitrd(initrd_path.clone(), e))?)
Daniel Verkampe403f5c2018-12-11 16:29:26 -08001859 } else {
1860 None
1861 };
1862
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07001863 let vm_image = match cfg.executable_path {
1864 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
1865 File::open(kernel_path).map_err(|e| Error::OpenKernel(kernel_path.to_path_buf(), e))?,
1866 ),
1867 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
1868 File::open(bios_path).map_err(|e| Error::OpenBios(bios_path.to_path_buf(), e))?,
1869 ),
1870 _ => panic!("Did not receive a bios or kernel, should be impossible."),
1871 };
1872
Dylan Reid059a1882018-07-23 17:58:09 -07001873 let components = VmComponents {
Daniel Verkamp6a847062019-11-26 13:16:35 -08001874 memory_size: cfg
1875 .memory
1876 .unwrap_or(256)
1877 .checked_mul(1024 * 1024)
1878 .ok_or(Error::MemoryTooLarge)?,
Dylan Reid059a1882018-07-23 17:58:09 -07001879 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -07001880 vcpu_affinity: cfg.vcpu_affinity.clone(),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07001881 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -08001882 android_fstab: cfg
1883 .android_fstab
1884 .as_ref()
David Tolnay2b089fc2019-03-04 15:33:22 -08001885 .map(|x| File::open(x).map_err(|e| Error::OpenAndroidFstab(x.to_path_buf(), e)))
Tristan Muntsinger4133b012018-12-21 16:01:56 -08001886 .map_or(Ok(None), |v| v.map(Some))?,
Kansho Nishida282115b2019-12-18 13:13:14 +09001887 pstore: cfg.pstore.clone(),
Daniel Verkampe403f5c2018-12-11 16:29:26 -08001888 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -07001889 extra_kernel_params: cfg.params.clone(),
1890 wayland_dmabuf: cfg.wayland_dmabuf,
Tomasz Jeznach42644642020-05-20 23:27:59 -07001891 acpi_sdts: cfg
1892 .acpi_tables
1893 .iter()
1894 .map(|path| SDT::from_file(path).map_err(|e| Error::OpenAcpiTable(path.clone(), e)))
1895 .collect::<Result<Vec<SDT>>>()?,
Dylan Reid059a1882018-07-23 17:58:09 -07001896 };
1897
Zach Reiznera60744b2019-02-13 17:33:32 -08001898 let control_server_socket = match &cfg.socket_path {
1899 Some(path) => Some(UnlinkUnixSeqpacketListener(
1900 UnixSeqpacketListener::bind(path).map_err(Error::CreateSocket)?,
1901 )),
1902 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07001903 };
Zach Reiznera60744b2019-02-13 17:33:32 -08001904
1905 let mut control_sockets = Vec::new();
Zach Reizner55a9e502018-10-03 10:22:32 -07001906 let (wayland_host_socket, wayland_device_socket) =
Gurchetan Singh53edb812019-05-22 08:57:16 -07001907 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?;
1908 control_sockets.push(TaggedControlSocket::VmMemory(wayland_host_socket));
Dylan Reid059a1882018-07-23 17:58:09 -07001909 // Balloon gets a special socket so balloon requests can be forwarded from the main process.
Zach Reizner55a9e502018-10-03 10:22:32 -07001910 let (balloon_host_socket, balloon_device_socket) =
Charles William Dick664cc3c2020-01-10 14:31:52 +09001911 msg_socket::pair::<BalloonControlCommand, BalloonControlResult>()
1912 .map_err(Error::CreateSocket)?;
Dylan Reid059a1882018-07-23 17:58:09 -07001913
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001914 // Create one control socket per disk.
1915 let mut disk_device_sockets = Vec::new();
1916 let mut disk_host_sockets = Vec::new();
1917 let disk_count = cfg.disks.len();
1918 for _ in 0..disk_count {
1919 let (disk_host_socket, disk_device_socket) =
Jakub Staronecf81e02019-04-11 11:43:39 -07001920 msg_socket::pair::<DiskControlCommand, DiskControlResult>()
1921 .map_err(Error::CreateSocket)?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001922 disk_host_sockets.push(disk_host_socket);
Jakub Starone7c59052019-04-09 12:31:14 -07001923 disk_device_sockets.push(disk_device_socket);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001924 }
1925
Daniel Verkampe1980a92020-02-07 11:00:55 -08001926 let mut pmem_device_sockets = Vec::new();
1927 let pmem_count = cfg.pmem_devices.len();
1928 for _ in 0..pmem_count {
1929 let (pmem_host_socket, pmem_device_socket) =
1930 msg_socket::pair::<VmMsyncResponse, VmMsyncRequest>().map_err(Error::CreateSocket)?;
1931 pmem_device_sockets.push(pmem_device_socket);
1932 control_sockets.push(TaggedControlSocket::VmMsync(pmem_host_socket));
1933 }
1934
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001935 let (gpu_host_socket, gpu_device_socket) =
1936 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?;
1937 control_sockets.push(TaggedControlSocket::VmMemory(gpu_host_socket));
1938
Zhuocheng Dingf2e90bf2019-12-02 15:50:20 +08001939 let (ioapic_host_socket, ioapic_device_socket) =
1940 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
1941 control_sockets.push(TaggedControlSocket::VmIrq(ioapic_host_socket));
1942
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001943 let map_request: Arc<Mutex<Option<ExternalMapping>>> = Arc::new(Mutex::new(None));
1944
Trent Begin17ccaad2019-04-17 13:51:25 -06001945 let linux = Arch::build_vm(
1946 components,
Trent Begin17ccaad2019-04-17 13:51:25 -06001947 &cfg.serial_parameters,
Matt Delco45caf912019-11-13 08:11:09 -08001948 simple_jail(&cfg, "serial")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07001949 |mem, vm, sys_allocator, exit_evt| {
Trent Begin17ccaad2019-04-17 13:51:25 -06001950 create_devices(
1951 &cfg,
Jakub Starona3411ea2019-04-24 10:55:25 -07001952 mem,
1953 vm,
1954 sys_allocator,
1955 exit_evt,
Xiong Zhanga5d248c2019-09-17 14:17:19 -07001956 &mut control_sockets,
Trent Begin17ccaad2019-04-17 13:51:25 -06001957 wayland_device_socket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001958 gpu_device_socket,
Trent Begin17ccaad2019-04-17 13:51:25 -06001959 balloon_device_socket,
1960 &mut disk_device_sockets,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001961 &mut pmem_device_sockets,
Trent Begin17ccaad2019-04-17 13:51:25 -06001962 usb_provider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001963 Arc::clone(&map_request),
Trent Begin17ccaad2019-04-17 13:51:25 -06001964 )
1965 },
Steven Richmanf32d0b42020-06-20 21:45:32 -07001966 create_vm,
1967 |vm, vcpu_count| create_irq_chip(vm, vcpu_count, ioapic_device_socket),
Trent Begin17ccaad2019-04-17 13:51:25 -06001968 )
David Tolnaybe034262019-03-04 17:48:36 -08001969 .map_err(Error::BuildVm)?;
Lepton Wu60893882018-11-21 11:06:18 -08001970
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001971 run_control(
1972 linux,
Zach Reiznera60744b2019-02-13 17:33:32 -08001973 control_server_socket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001974 control_sockets,
1975 balloon_host_socket,
1976 &disk_host_sockets,
Jingkui Wang100e6e42019-03-08 20:41:57 -08001977 usb_control_socket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001978 sigchld_fd,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001979 cfg.sandbox,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001980 Arc::clone(&map_request),
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001981 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07001982}
1983
Steven Richmanf32d0b42020-06-20 21:45:32 -07001984fn run_control<V: VmArch + 'static, I: IrqChipArch<V::Vcpu> + 'static>(
1985 mut linux: RunnableLinuxVm<V, I>,
Zach Reiznera60744b2019-02-13 17:33:32 -08001986 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Jakub Starond99cd0a2019-04-11 14:09:39 -07001987 mut control_sockets: Vec<TaggedControlSocket>,
Jakub Staron1f828d72019-04-11 12:49:29 -07001988 balloon_host_socket: BalloonControlRequestSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07001989 disk_host_sockets: &[DiskControlRequestSocket],
Jingkui Wang100e6e42019-03-08 20:41:57 -08001990 usb_control_socket: UsbControlSocket,
Zach Reizner55a9e502018-10-03 10:22:32 -07001991 sigchld_fd: SignalFd,
Lepton Wu20333e42019-03-14 10:48:03 -07001992 sandbox: bool,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001993 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Zach Reizner55a9e502018-10-03 10:22:32 -07001994) -> Result<()> {
David Tolnay5bbbf612018-12-01 17:49:30 -08001995 const LOWMEM_AVAILABLE: &str = "/sys/kernel/mm/chromeos-low_mem/available";
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001996
Zach Reizner5bed0d22018-03-28 02:31:11 -07001997 #[derive(PollToken)]
1998 enum Token {
1999 Exit,
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002000 Suspend,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002001 ChildSignal,
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002002 IrqFd { gsi: usize },
Charles William Dick0bf8a552019-10-29 15:36:01 +09002003 BalanceMemory,
2004 BalloonResult,
Zach Reiznera60744b2019-02-13 17:33:32 -08002005 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002006 VmControl { index: usize },
2007 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002008
Zach Reizner19ad1f32019-12-12 18:58:50 -08002009 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08002010 .set_raw_mode()
2011 .expect("failed to set terminal raw mode");
2012
Zach Reiznerb2110be2019-07-23 15:55:03 -07002013 let poll_ctx = PollContext::build_with(&[
2014 (&linux.exit_evt, Token::Exit),
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002015 (&linux.suspend_evt, Token::Suspend),
Zach Reiznerb2110be2019-07-23 15:55:03 -07002016 (&sigchld_fd, Token::ChildSignal),
2017 ])
2018 .map_err(Error::PollContextAdd)?;
2019
Zach Reiznera60744b2019-02-13 17:33:32 -08002020 if let Some(socket_server) = &control_server_socket {
2021 poll_ctx
2022 .add(socket_server, Token::VmControlServer)
2023 .map_err(Error::PollContextAdd)?;
2024 }
Dylan Reid059a1882018-07-23 17:58:09 -07002025 for (index, socket) in control_sockets.iter().enumerate() {
Zach Reizner55a9e502018-10-03 10:22:32 -07002026 poll_ctx
2027 .add(socket.as_ref(), Token::VmControl { index })
2028 .map_err(Error::PollContextAdd)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002029 }
2030
Steven Richmanf32d0b42020-06-20 21:45:32 -07002031 let events = linux
2032 .irq_chip
2033 .irq_event_tokens()
2034 .map_err(Error::PollContextAdd)?;
2035
2036 for (gsi, evt) in events {
2037 poll_ctx
2038 .add(&evt, Token::IrqFd { gsi: gsi as usize })
2039 .map_err(Error::PollContextAdd)?;
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002040 }
2041
Charles William Dick0bf8a552019-10-29 15:36:01 +09002042 // Balance available memory between guest and host every second.
Dylan Reid1160f452020-04-22 17:26:00 +00002043 let balancemem_timer = TimerFd::new().map_err(Error::CreateTimerFd)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002044 if Path::new(LOWMEM_AVAILABLE).exists() {
2045 // Create timer request balloon stats every 1s.
2046 poll_ctx
2047 .add(&balancemem_timer, Token::BalanceMemory)
2048 .map_err(Error::PollContextAdd)?;
2049 let balancemem_dur = Duration::from_secs(1);
2050 let balancemem_int = Duration::from_secs(1);
2051 balancemem_timer
2052 .reset(balancemem_dur, Some(balancemem_int))
2053 .map_err(Error::ResetTimerFd)?;
2054
2055 // Listen for balloon statistics from the guest so we can balance.
2056 poll_ctx
2057 .add(&balloon_host_socket, Token::BalloonResult)
2058 .map_err(Error::PollContextAdd)?;
2059 } else {
2060 warn!("Unable to open low mem available, maybe not a chrome os kernel");
2061 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002062
Lepton Wu20333e42019-03-14 10:48:03 -07002063 if sandbox {
2064 // Before starting VCPUs, in case we started with some capabilities, drop them all.
2065 drop_capabilities().map_err(Error::DropCapabilities)?;
2066 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08002067
Steven Richmanf32d0b42020-06-20 21:45:32 -07002068 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
2069 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
Zach Reizner6a8fdd92019-01-16 14:38:41 -08002070 let run_mode_arc = Arc::new(VcpuRunMode::default());
Steven Richmanf32d0b42020-06-20 21:45:32 -07002071 let use_hypervisor_signals = !linux
2072 .vm
2073 .get_hypervisor()
2074 .check_capability(&HypervisorCap::ImmediateExit);
2075 setup_vcpu_signal_handler::<V::Vcpu>(use_hypervisor_signals)?;
2076
2077 let vcpus: Vec<Option<V::Vcpu>> = match linux.vcpus.take() {
2078 Some(vec) => vec.into_iter().map(|vcpu| Some(vcpu)).collect(),
2079 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
2080 };
Daniel Verkamp94c35272019-09-12 13:31:30 -07002081 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Zach Reizner55a9e502018-10-03 10:22:32 -07002082 let handle = run_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002083 cpu_id,
Zach Reizner55a9e502018-10-03 10:22:32 -07002084 vcpu,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002085 linux.vm.try_clone().map_err(Error::CloneEventFd)?,
2086 linux.irq_chip.try_clone().map_err(Error::CloneEventFd)?,
2087 linux.vcpu_count,
Daniel Verkamp107edb32019-04-05 09:58:48 -07002088 linux.vcpu_affinity.clone(),
Zach Reizner55a9e502018-10-03 10:22:32 -07002089 vcpu_thread_barrier.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002090 linux.has_bios,
Zach Reizner55a9e502018-10-03 10:22:32 -07002091 linux.io_bus.clone(),
2092 linux.mmio_bus.clone(),
2093 linux.exit_evt.try_clone().map_err(Error::CloneEventFd)?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002094 linux.vm.check_capability(VmCap::PvClockSuspend),
Zach Reizner6a8fdd92019-01-16 14:38:41 -08002095 run_mode_arc.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002096 use_hypervisor_signals,
Zach Reizner55a9e502018-10-03 10:22:32 -07002097 )?;
Dylan Reid059a1882018-07-23 17:58:09 -07002098 vcpu_handles.push(handle);
2099 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002100
Dylan Reid059a1882018-07-23 17:58:09 -07002101 vcpu_thread_barrier.wait();
2102
Zach Reizner39aa26b2017-12-12 18:03:23 -08002103 'poll: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002104 let events = {
2105 match poll_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002106 Ok(v) => v,
2107 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08002108 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08002109 break;
2110 }
2111 }
2112 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002113
Steven Richmanf32d0b42020-06-20 21:45:32 -07002114 if let Err(e) = linux.irq_chip.process_delayed_irq_events() {
2115 warn!("can't deliver delayed irqs: {}", e);
2116 }
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002117
Zach Reiznera60744b2019-02-13 17:33:32 -08002118 let mut vm_control_indices_to_remove = Vec::new();
Zach Reizner5bed0d22018-03-28 02:31:11 -07002119 for event in events.iter_readable() {
2120 match event.token() {
2121 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002122 info!("vcpu requested shutdown");
2123 break 'poll;
2124 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002125 Token::Suspend => {
2126 info!("VM requested suspend");
2127 linux.suspend_evt.read().unwrap();
2128 run_mode_arc.set_and_notify(VmRunMode::Suspending);
2129 for handle in &vcpu_handles {
2130 let _ = handle.kill(SIGRTMIN() + 0);
2131 }
2132 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002133 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002134 // Print all available siginfo structs, then exit the loop.
David Tolnayf5032762018-12-03 10:46:45 -08002135 while let Some(siginfo) = sigchld_fd.read().map_err(Error::SignalFd)? {
Zach Reizner3ba00982019-01-23 19:04:43 -08002136 let pid = siginfo.ssi_pid;
2137 let pid_label = match linux.pid_debug_label_map.get(&pid) {
2138 Some(label) => format!("{} (pid {})", label, pid),
2139 None => format!("pid {}", pid),
2140 };
David Tolnayf5032762018-12-03 10:46:45 -08002141 error!(
2142 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08002143 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08002144 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08002145 }
David Tolnayf5032762018-12-03 10:46:45 -08002146 break 'poll;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002147 }
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002148 Token::IrqFd { gsi } => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002149 if let Err(e) = linux.irq_chip.service_irq_event(gsi as u32) {
2150 error!("failed to signal irq {}: {}", gsi, e);
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002151 }
2152 }
Charles William Dick0bf8a552019-10-29 15:36:01 +09002153 Token::BalanceMemory => {
2154 balancemem_timer.wait().map_err(Error::TimerFd)?;
2155 let command = BalloonControlCommand::Stats {};
2156 if let Err(e) = balloon_host_socket.send(&command) {
2157 warn!("failed to send stats request to balloon device: {}", e);
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002158 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002159 }
Charles William Dick0bf8a552019-10-29 15:36:01 +09002160 Token::BalloonResult => {
2161 match balloon_host_socket.recv() {
2162 Ok(BalloonControlResult::Stats {
2163 stats,
2164 balloon_actual: balloon_actual_u,
2165 }) => {
2166 // Available memory is reported in MB, and we need bytes.
2167 let host_available = file_to_i64(LOWMEM_AVAILABLE)
2168 .map_err(Error::ReadMemAvailable)?
2169 << 20;
2170 let guest_available_u = if let Some(available) = stats.available_memory
2171 {
2172 available
2173 } else {
2174 warn!("guest available_memory stat is missing");
2175 continue;
2176 };
2177 if guest_available_u > i64::max_value() as u64 {
2178 warn!("guest available memory is too large");
2179 continue;
2180 }
2181 if balloon_actual_u > i64::max_value() as u64 {
2182 warn!("actual balloon size is too large");
2183 continue;
2184 }
2185 // Guest and host available memory is balanced equally.
2186 const GUEST_SHARE: i64 = 1;
2187 const HOST_SHARE: i64 = 1;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002188 // Tell the guest to change the balloon size if the target balloon size
2189 // is more than 5% different from the current balloon size.
Charles William Dick0bf8a552019-10-29 15:36:01 +09002190 const RESIZE_PERCENT: i64 = 5;
2191 let balloon_actual = balloon_actual_u as i64;
2192 let guest_available = guest_available_u as i64;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002193 // Compute how much memory the guest should have available after we
2194 // rebalance.
Charles William Dick0bf8a552019-10-29 15:36:01 +09002195 let guest_available_target = (GUEST_SHARE
2196 * (guest_available + host_available))
2197 / (GUEST_SHARE + HOST_SHARE);
2198 let guest_available_delta = guest_available_target - guest_available;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002199 // How much do we have to change the balloon to balance.
Charles William Dick0bf8a552019-10-29 15:36:01 +09002200 let balloon_target = max(balloon_actual - guest_available_delta, 0);
Steven Richmanf32d0b42020-06-20 21:45:32 -07002201 // Compute the change in balloon size in percent. If the balloon size
2202 // is 0, use 1 so we don't overflow from the infinity % increase.
Charles William Dick0bf8a552019-10-29 15:36:01 +09002203 let balloon_change_percent = (balloon_actual - balloon_target).abs()
2204 * 100
2205 / max(balloon_actual, 1);
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002206
Charles William Dick0bf8a552019-10-29 15:36:01 +09002207 if balloon_change_percent >= RESIZE_PERCENT {
Daniel Verkamp1cd80992020-07-27 12:41:50 -07002208 info!("resizing balloon: host avail {}, guest avail {} (target {}), balloon actual {} (target {})",
2209 host_available,
2210 guest_available,
2211 guest_available_target,
2212 balloon_actual,
2213 balloon_target,
2214 );
Charles William Dick0bf8a552019-10-29 15:36:01 +09002215 let command = BalloonControlCommand::Adjust {
2216 num_bytes: balloon_target as u64,
2217 };
2218 if let Err(e) = balloon_host_socket.send(&command) {
2219 warn!("failed to send memory value to balloon device: {}", e);
2220 }
2221 }
2222 }
2223 Err(e) => {
2224 error!("failed to recv BalloonControlResult: {}", e);
2225 }
2226 };
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002227 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002228 Token::VmControlServer => {
2229 if let Some(socket_server) = &control_server_socket {
2230 match socket_server.accept() {
2231 Ok(socket) => {
2232 poll_ctx
2233 .add(
2234 &socket,
2235 Token::VmControl {
2236 index: control_sockets.len(),
2237 },
2238 )
2239 .map_err(Error::PollContextAdd)?;
Jakub Starond99cd0a2019-04-11 14:09:39 -07002240 control_sockets
2241 .push(TaggedControlSocket::Vm(MsgSocket::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08002242 }
2243 Err(e) => error!("failed to accept socket: {}", e),
2244 }
2245 }
2246 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002247 Token::VmControl { index } => {
Daniel Verkamp37c4a782019-01-04 10:44:17 -08002248 if let Some(socket) = control_sockets.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002249 match socket {
2250 TaggedControlSocket::Vm(socket) => match socket.recv() {
2251 Ok(request) => {
2252 let mut run_mode_opt = None;
2253 let response = request.execute(
2254 &mut run_mode_opt,
2255 &balloon_host_socket,
2256 disk_host_sockets,
2257 &usb_control_socket,
2258 );
2259 if let Err(e) = socket.send(&response) {
2260 error!("failed to send VmResponse: {}", e);
2261 }
2262 if let Some(run_mode) = run_mode_opt {
2263 info!("control socket changed run mode to {}", run_mode);
2264 match run_mode {
2265 VmRunMode::Exiting => {
2266 break 'poll;
2267 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002268 VmRunMode::Running => {
2269 if let VmRunMode::Suspending =
2270 *run_mode_arc.mtx.lock()
2271 {
2272 linux.io_bus.notify_resume();
2273 }
2274 run_mode_arc.set_and_notify(VmRunMode::Running);
2275 for handle in &vcpu_handles {
2276 let _ = handle.kill(SIGRTMIN() + 0);
2277 }
2278 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07002279 other => {
2280 run_mode_arc.set_and_notify(other);
2281 for handle in &vcpu_handles {
2282 let _ = handle.kill(SIGRTMIN() + 0);
2283 }
Zach Reizner6a8fdd92019-01-16 14:38:41 -08002284 }
2285 }
2286 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002287 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07002288 Err(e) => {
Zach Reizner297ae772020-02-21 14:45:14 -08002289 if let MsgError::RecvZero = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002290 vm_control_indices_to_remove.push(index);
2291 } else {
2292 error!("failed to recv VmRequest: {}", e);
2293 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002294 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07002295 },
Gurchetan Singh53edb812019-05-22 08:57:16 -07002296 TaggedControlSocket::VmMemory(socket) => match socket.recv() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002297 Ok(request) => {
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002298 let response = request.execute(
2299 &mut linux.vm,
2300 &mut linux.resources,
2301 Arc::clone(&map_request),
2302 );
Jakub Starond99cd0a2019-04-11 14:09:39 -07002303 if let Err(e) = socket.send(&response) {
Gurchetan Singh53edb812019-05-22 08:57:16 -07002304 error!("failed to send VmMemoryControlResponse: {}", e);
Jakub Starond99cd0a2019-04-11 14:09:39 -07002305 }
2306 }
2307 Err(e) => {
Zach Reizner297ae772020-02-21 14:45:14 -08002308 if let MsgError::RecvZero = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002309 vm_control_indices_to_remove.push(index);
2310 } else {
Gurchetan Singh53edb812019-05-22 08:57:16 -07002311 error!("failed to recv VmMemoryControlRequest: {}", e);
Jakub Starond99cd0a2019-04-11 14:09:39 -07002312 }
2313 }
2314 },
Xiong Zhang2515b752019-09-19 10:29:02 +08002315 TaggedControlSocket::VmIrq(socket) => match socket.recv() {
2316 Ok(request) => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002317 let response = {
2318 let irq_chip = &mut linux.irq_chip;
2319 request.execute(
2320 |setup| match setup {
2321 IrqSetup::Event(irq, ev) => {
2322 irq_chip.register_irq_event(irq, ev, None)
2323 }
2324 IrqSetup::Route(route) => irq_chip.route_irq(route),
2325 },
2326 &mut linux.resources,
2327 )
2328 };
Xiong Zhang2515b752019-09-19 10:29:02 +08002329 if let Err(e) = socket.send(&response) {
2330 error!("failed to send VmIrqResponse: {}", e);
2331 }
2332 }
2333 Err(e) => {
Zach Reizner297ae772020-02-21 14:45:14 -08002334 if let MsgError::RecvZero = e {
Xiong Zhang2515b752019-09-19 10:29:02 +08002335 vm_control_indices_to_remove.push(index);
2336 } else {
2337 error!("failed to recv VmIrqRequest: {}", e);
2338 }
2339 }
2340 },
Daniel Verkampe1980a92020-02-07 11:00:55 -08002341 TaggedControlSocket::VmMsync(socket) => match socket.recv() {
2342 Ok(request) => {
2343 let response = request.execute(&mut linux.vm);
2344 if let Err(e) = socket.send(&response) {
2345 error!("failed to send VmMsyncResponse: {}", e);
2346 }
2347 }
2348 Err(e) => {
2349 if let MsgError::BadRecvSize { actual: 0, .. } = e {
2350 vm_control_indices_to_remove.push(index);
2351 } else {
2352 error!("failed to recv VmMsyncRequest: {}", e);
2353 }
2354 }
2355 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08002356 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002357 }
2358 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002359 }
2360 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002361
Zach Reizner5bed0d22018-03-28 02:31:11 -07002362 for event in events.iter_hungup() {
Zach Reiznera60744b2019-02-13 17:33:32 -08002363 match event.token() {
2364 Token::Exit => {}
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002365 Token::Suspend => {}
Zach Reiznera60744b2019-02-13 17:33:32 -08002366 Token::ChildSignal => {}
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002367 Token::IrqFd { gsi: _ } => {}
Charles William Dick0bf8a552019-10-29 15:36:01 +09002368 Token::BalanceMemory => {}
2369 Token::BalloonResult => {}
Zach Reiznera60744b2019-02-13 17:33:32 -08002370 Token::VmControlServer => {}
2371 Token::VmControl { index } => {
2372 // It's possible more data is readable and buffered while the socket is hungup,
2373 // so don't delete the socket from the poll context until we're sure all the
2374 // data is read.
Jakub Starond99cd0a2019-04-11 14:09:39 -07002375 match control_sockets
2376 .get(index)
2377 .map(|s| s.as_ref().get_readable_bytes())
2378 {
Zach Reiznera60744b2019-02-13 17:33:32 -08002379 Some(Ok(0)) | Some(Err(_)) => vm_control_indices_to_remove.push(index),
2380 Some(Ok(x)) => info!("control index {} has {} bytes readable", index, x),
2381 _ => {}
Zach Reizner55a9e502018-10-03 10:22:32 -07002382 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002383 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002384 }
2385 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002386
2387 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08002388 // preserves correct indexes as each element is removed.
Zach Reiznera60744b2019-02-13 17:33:32 -08002389 vm_control_indices_to_remove.sort_unstable_by(|a, b| b.cmp(a));
2390 vm_control_indices_to_remove.dedup();
2391 for index in vm_control_indices_to_remove {
Zide Chen89584072019-11-14 10:33:51 -08002392 // Delete the socket from the `poll_ctx` synchronously. Otherwise, the kernel will do
2393 // this automatically when the FD inserted into the `poll_ctx` is closed after this
2394 // if-block, but this removal can be deferred unpredictably. In some instances where the
2395 // system is under heavy load, we can even get events returned by `poll_ctx` for an FD
2396 // that has already been closed. Because the token associated with that spurious event
2397 // now belongs to a different socket, the control loop will start to interact with
2398 // sockets that might not be ready to use. This can cause incorrect hangup detection or
2399 // blocking on a socket that will never be ready. See also: crbug.com/1019986
2400 if let Some(socket) = control_sockets.get(index) {
2401 poll_ctx.delete(socket).map_err(Error::PollContextDelete)?;
2402 }
2403
2404 // This line implicitly drops the socket at `index` when it gets returned by
2405 // `swap_remove`. After this line, the socket at `index` is not the one from
2406 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
2407 // use `poll_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznera60744b2019-02-13 17:33:32 -08002408 control_sockets.swap_remove(index);
2409 if let Some(socket) = control_sockets.get(index) {
2410 poll_ctx
Xiong Zhang44bb3dd2019-04-23 17:09:50 +08002411 .modify(
2412 socket,
2413 WatchingEvents::empty().set_read(),
2414 Token::VmControl { index },
2415 )
Zach Reiznera60744b2019-02-13 17:33:32 -08002416 .map_err(Error::PollContextAdd)?;
2417 }
2418 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002419 }
2420
Zach Reizner6a8fdd92019-01-16 14:38:41 -08002421 // VCPU threads MUST see the VmRunMode flag, otherwise they may re-enter the VM.
2422 run_mode_arc.set_and_notify(VmRunMode::Exiting);
Dylan Reid059a1882018-07-23 17:58:09 -07002423 for handle in vcpu_handles {
Dmitry Torokhovcd405332018-02-16 16:25:54 -08002424 match handle.kill(SIGRTMIN() + 0) {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002425 Ok(_) => {
2426 if let Err(e) = handle.join() {
2427 error!("failed to join vcpu thread: {:?}", e);
2428 }
2429 }
David Tolnayb4bd00f2019-02-12 17:51:26 -08002430 Err(e) => error!("failed to kill vcpu thread: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -08002431 }
2432 }
2433
Daniel Verkamp94c35272019-09-12 13:31:30 -07002434 // Explicitly drop the VM structure here to allow the devices to clean up before the
2435 // control sockets are closed when this function exits.
2436 mem::drop(linux);
2437
Zach Reizner19ad1f32019-12-12 18:58:50 -08002438 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08002439 .set_canon_mode()
2440 .expect("failed to restore canonical mode for terminal");
2441
2442 Ok(())
2443}