blob: ea78adf63024987a1c5726deef2bba07896394a4 [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07005use std::cmp::{max, Reverse};
Jakub Starona3411ea2019-04-24 10:55:25 -07006use std::convert::TryFrom;
John Batesb220eac2020-09-14 17:03:02 -07007#[cfg(feature = "gpu")]
8use std::env;
David Tolnayfdac5ed2019-03-08 16:56:14 -08009use std::error::Error as StdError;
Dylan Reid059a1882018-07-23 17:58:09 -070010use std::ffi::CStr;
David Tolnayc69f9752019-03-01 18:07:56 -080011use std::fmt::{self, Display};
Dylan Reid059a1882018-07-23 17:58:09 -070012use std::fs::{File, OpenOptions};
Zach Reizner55a9e502018-10-03 10:22:32 -070013use std::io::{self, stdin, Read};
Steven Richmanf32d0b42020-06-20 21:45:32 -070014use std::iter;
Daniel Verkamp94c35272019-09-12 13:31:30 -070015use std::mem;
David Tolnay2b089fc2019-03-04 15:33:22 -080016use std::net::Ipv4Addr;
Daniel Verkamp6f9215c2019-08-20 09:41:22 -070017#[cfg(feature = "gpu")]
Zach Reizner0f2cfb02019-06-19 17:46:03 -070018use std::num::NonZeroU8;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +090019use std::num::ParseIntError;
Jakub Starond99cd0a2019-04-11 14:09:39 -070020use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
Zach Reiznera60744b2019-02-13 17:33:32 -080021use std::os::unix::net::UnixStream;
Zach Reizner39aa26b2017-12-12 18:03:23 -080022use std::path::{Path, PathBuf};
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +090023use std::ptr;
Chirantan Ekbote448516e2018-07-24 16:07:42 -070024use std::str;
Dylan Reid059a1882018-07-23 17:58:09 -070025use std::sync::{Arc, Barrier};
Zach Reizner39aa26b2017-12-12 18:03:23 -080026use std::thread;
27use std::thread::JoinHandle;
Charles William Dick0bf8a552019-10-29 15:36:01 +090028use std::time::Duration;
Zach Reizner39aa26b2017-12-12 18:03:23 -080029
David Tolnay41a6f842019-03-01 16:18:44 -080030use libc::{self, c_int, gid_t, uid_t};
Zach Reizner39aa26b2017-12-12 18:03:23 -080031
Tomasz Jeznach42644642020-05-20 23:27:59 -070032use acpi_tables::sdt::SDT;
33
Michael Hoyle6b196952020-08-02 20:09:41 -070034use base::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Zach Reizner65b98f12019-11-22 17:34:58 -080035#[cfg(feature = "gpu")]
36use devices::virtio::EventDevice;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070037use devices::virtio::{self, Console, VirtioDevice};
paulhsiace17e6e2020-08-28 18:37:45 +080038#[cfg(feature = "audio")]
39use devices::Ac97Dev;
Xiong Zhang17b0daf2019-04-23 17:14:50 +080040use devices::{
Steven Richmanf32d0b42020-06-20 21:45:32 -070041 self, HostBackendDeviceProvider, KvmKernelIrqChip, PciDevice, VfioContainer, VfioDevice,
42 VfioPciDevice, VirtioPciDevice, XhciController,
Xiong Zhang17b0daf2019-04-23 17:14:50 +080043};
Steven Richmanf32d0b42020-06-20 21:45:32 -070044use hypervisor::kvm::{Kvm, KvmVcpu, KvmVm};
Zach Reizner2c770e62020-09-30 16:49:59 -070045use hypervisor::{Hypervisor, HypervisorCap, Vcpu, VcpuExit, VcpuRunHandle, Vm, VmCap};
Allen Webbf3024c82020-06-19 07:19:48 -070046use minijail::{self, Minijail};
Zach Reiznera60744b2019-02-13 17:33:32 -080047use msg_socket::{MsgError, MsgReceiver, MsgSender, MsgSocket};
David Tolnay2b089fc2019-03-04 15:33:22 -080048use net_util::{Error as NetError, MacAddress, Tap};
David Tolnay3df35522019-03-11 12:36:30 -070049use remain::sorted;
Xiong Zhang87a3b442019-10-29 17:32:44 +080050use resources::{Alloc, MmioType, SystemAllocator};
Zach Reizner6a8fdd92019-01-16 14:38:41 -080051use sync::{Condvar, Mutex};
Jakub Starona3411ea2019-04-24 10:55:25 -070052
Michael Hoyle6b196952020-08-02 20:09:41 -070053use base::{
David Tolnay633426a2019-04-12 12:18:35 -070054 self, block_signal, clear_signal, drop_capabilities, error, flock, get_blocked_signals,
Fletcher Woodruff82ff3972019-10-02 13:11:34 -060055 get_group_id, get_user_id, getegid, geteuid, info, register_rt_signal_handler,
Michael Hoyle685316f2020-09-16 15:29:20 -070056 set_cpu_affinity, set_rt_prio_limit, set_rt_round_robin, signal, validate_raw_fd, warn, Event,
57 ExternalMapping, FlockOperation, Killable, MemoryMappingArena, PollContext, PollToken,
Michael Hoyle08d86a42020-08-19 14:45:21 -070058 Protection, ScopedEvent, SignalFd, Terminal, Timer, WatchingEvents, SIGRTMIN,
Zach Reiznera60744b2019-02-13 17:33:32 -080059};
Jakub Starone7c59052019-04-09 12:31:14 -070060use vm_control::{
Jakub Staron1f828d72019-04-11 12:49:29 -070061 BalloonControlCommand, BalloonControlRequestSocket, BalloonControlResponseSocket,
Charles William Dick664cc3c2020-01-10 14:31:52 +090062 BalloonControlResult, DiskControlCommand, DiskControlRequestSocket, DiskControlResponseSocket,
Steven Richmanf32d0b42020-06-20 21:45:32 -070063 DiskControlResult, IrqSetup, UsbControlSocket, VmControlResponseSocket, VmIrqRequest,
64 VmIrqRequestSocket, VmIrqResponse, VmIrqResponseSocket, VmMemoryControlRequestSocket,
65 VmMemoryControlResponseSocket, VmMemoryRequest, VmMemoryResponse, VmMsyncRequest,
66 VmMsyncRequestSocket, VmMsyncResponse, VmMsyncResponseSocket, VmRunMode,
Jakub Starone7c59052019-04-09 12:31:14 -070067};
Dylan Reidec058d62020-07-20 20:21:11 -070068use vm_memory::{GuestAddress, GuestMemory};
Zach Reizner39aa26b2017-12-12 18:03:23 -080069
Daniel Verkamp50740ce2020-02-28 12:36:56 -080070use crate::{Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070071use arch::{
Daniel Verkampc677fb42020-09-08 13:47:49 -070072 self, LinuxArch, RunnableLinuxVm, SerialHardware, SerialParameters, VcpuAffinity,
73 VirtioDeviceStub, VmComponents, VmImage,
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070074};
Sonny Raoed517d12018-02-13 22:09:43 -080075
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080076#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070077use {
78 aarch64::AArch64 as Arch,
79 devices::{IrqChip, IrqChipAArch64 as IrqChipArch},
80 hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
81};
Zach Reizner55a9e502018-10-03 10:22:32 -070082#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070083use {
84 devices::{IrqChipX86_64, IrqChipX86_64 as IrqChipArch, KvmSplitIrqChip},
85 hypervisor::{VcpuX86_64, VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
86 x86_64::X8664arch as Arch,
87};
Zach Reizner39aa26b2017-12-12 18:03:23 -080088
David Tolnay3df35522019-03-11 12:36:30 -070089#[sorted]
Dylan Reid059a1882018-07-23 17:58:09 -070090#[derive(Debug)]
Zach Reizner39aa26b2017-12-12 18:03:23 -080091pub enum Error {
Michael Hoyle6b196952020-08-02 20:09:41 -070092 AddGpuDeviceMemory(base::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -070093 AddIrqChipVcpu(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -070094 AddPmemDeviceMemory(base::Error),
Lepton Wu60893882018-11-21 11:06:18 -080095 AllocateGpuDeviceAddress,
Jakub Starona3411ea2019-04-24 10:55:25 -070096 AllocatePmemDeviceAddress(resources::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -080097 BalloonDeviceNew(virtio::BalloonError),
Michael Hoyle6b196952020-08-02 20:09:41 -070098 BlockDeviceNew(base::Error),
99 BlockSignal(base::signal::Error),
David Tolnaybe034262019-03-04 17:48:36 -0800100 BuildVm(<Arch as LinuxArch>::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700101 ChownTpmStorage(base::Error),
Michael Hoyle685316f2020-09-16 15:29:20 -0700102 CloneEvent(base::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700103 CloneVcpu(base::Error),
104 ConfigureVcpu(<Arch as LinuxArch>::Error),
Andrew Scull1590e6f2020-03-18 18:00:47 +0000105 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +0800106 CreateAc97(devices::PciDeviceError),
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -0700107 CreateConsole(arch::serial::Error),
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700108 CreateDiskError(disk::Error),
Michael Hoyle685316f2020-09-16 15:29:20 -0700109 CreateEvent(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700110 CreatePollContext(base::Error),
111 CreateSignalFd(base::SignalFdError),
Zach Reizner8fb52112017-12-13 16:04:39 -0800112 CreateSocket(io::Error),
Chirantan Ekbote49fa08f2018-11-16 13:26:53 -0800113 CreateTapDevice(NetError),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700114 CreateTimer(base::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -0800115 CreateTpmStorage(PathBuf, io::Error),
Jingkui Wang100e6e42019-03-08 20:41:57 -0800116 CreateUsbProvider(devices::usb::host_backend::error::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700117 CreateVcpu(base::Error),
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800118 CreateVfioDevice(devices::vfio::VfioError),
Allen Webbf3024c82020-06-19 07:19:48 -0700119 DeviceJail(minijail::Error),
120 DevicePivotRoot(minijail::Error),
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800121 Disk(PathBuf, io::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700122 DiskImageLock(base::Error),
123 DropCapabilities(base::Error),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900124 FsDeviceNew(virtio::fs::Error),
125 GetMaxOpenFiles(io::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700126 GetSignalMask(signal::Error),
Lepton Wu39133a02019-02-27 12:42:29 -0800127 InputDeviceNew(virtio::InputError),
128 InputEventsOpen(std::io::Error),
Dylan Reid20566442018-04-02 15:06:15 -0700129 InvalidFdPath,
Zach Reizner579bd2c2018-09-14 15:43:33 -0700130 InvalidWaylandPath,
Allen Webbf3024c82020-06-19 07:19:48 -0700131 IoJail(minijail::Error),
David Tolnayfdac5ed2019-03-08 16:56:14 -0800132 LoadKernel(Box<dyn StdError>),
Daniel Verkamp6a847062019-11-26 13:16:35 -0800133 MemoryTooLarge,
David Tolnay2b089fc2019-03-04 15:33:22 -0800134 NetDeviceNew(virtio::NetError),
Tomasz Jeznach42644642020-05-20 23:27:59 -0700135 OpenAcpiTable(PathBuf, io::Error),
Tristan Muntsinger4133b012018-12-21 16:01:56 -0800136 OpenAndroidFstab(PathBuf, io::Error),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700137 OpenBios(PathBuf, io::Error),
Daniel Verkampe403f5c2018-12-11 16:29:26 -0800138 OpenInitrd(PathBuf, io::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800139 OpenKernel(PathBuf, io::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -0800140 OpenVinput(PathBuf, io::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800141 P9DeviceNew(virtio::P9Error),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900142 ParseMaxOpenFiles(ParseIntError),
Lepton Wu39133a02019-02-27 12:42:29 -0800143 PivotRootDoesntExist(&'static str),
Jakub Starona3411ea2019-04-24 10:55:25 -0700144 PmemDeviceImageTooBig,
Michael Hoyle6b196952020-08-02 20:09:41 -0700145 PmemDeviceNew(base::Error),
146 PollContextAdd(base::Error),
147 PollContextDelete(base::Error),
Charles William Dick0bf8a552019-10-29 15:36:01 +0900148 ReadMemAvailable(io::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700149 RegisterBalloon(arch::DeviceRegistrationError),
150 RegisterBlock(arch::DeviceRegistrationError),
151 RegisterGpu(arch::DeviceRegistrationError),
152 RegisterNet(arch::DeviceRegistrationError),
153 RegisterP9(arch::DeviceRegistrationError),
154 RegisterRng(arch::DeviceRegistrationError),
Michael Hoyle6b196952020-08-02 20:09:41 -0700155 RegisterSignalHandler(base::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700156 RegisterWayland(arch::DeviceRegistrationError),
Michael Hoyle6b196952020-08-02 20:09:41 -0700157 ReserveGpuMemory(base::MmapError),
158 ReserveMemory(base::Error),
159 ReservePmemMemory(base::MmapError),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700160 ResetTimer(base::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800161 RngDeviceNew(virtio::RngError),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700162 RunnableVcpu(base::Error),
Allen Webbf3024c82020-06-19 07:19:48 -0700163 SettingGidMap(minijail::Error),
164 SettingMaxOpenFiles(minijail::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700165 SettingSignalMask(base::Error),
Allen Webbf3024c82020-06-19 07:19:48 -0700166 SettingUidMap(minijail::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700167 SignalFd(base::SignalFdError),
Zach Reizner8fb52112017-12-13 16:04:39 -0800168 SpawnVcpu(io::Error),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700169 Timer(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700170 ValidateRawFd(base::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800171 VhostNetDeviceNew(virtio::vhost::Error),
172 VhostVsockDeviceNew(virtio::vhost::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700173 VirtioPciDev(base::Error),
174 WaylandDeviceNew(base::Error),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800175}
176
David Tolnayc69f9752019-03-01 18:07:56 -0800177impl Display for Error {
David Tolnay3df35522019-03-11 12:36:30 -0700178 #[remain::check]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800179 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
David Tolnayc69f9752019-03-01 18:07:56 -0800180 use self::Error::*;
181
David Tolnay3df35522019-03-11 12:36:30 -0700182 #[sorted]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800183 match self {
Lepton Wu60893882018-11-21 11:06:18 -0800184 AddGpuDeviceMemory(e) => write!(f, "failed to add gpu device memory: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700185 AddIrqChipVcpu(e) => write!(f, "failed to add vcpu to irq chip: {}", e),
Jakub Starona3411ea2019-04-24 10:55:25 -0700186 AddPmemDeviceMemory(e) => write!(f, "failed to add pmem device memory: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800187 AllocateGpuDeviceAddress => write!(f, "failed to allocate gpu device guest address"),
Jakub Starona3411ea2019-04-24 10:55:25 -0700188 AllocatePmemDeviceAddress(e) => {
189 write!(f, "failed to allocate memory for pmem device: {}", e)
190 }
David Tolnayc69f9752019-03-01 18:07:56 -0800191 BalloonDeviceNew(e) => write!(f, "failed to create balloon: {}", e),
192 BlockDeviceNew(e) => write!(f, "failed to create block device: {}", e),
193 BlockSignal(e) => write!(f, "failed to block signal: {}", e),
David Tolnaybe034262019-03-04 17:48:36 -0800194 BuildVm(e) => write!(f, "The architecture failed to build the vm: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800195 ChownTpmStorage(e) => write!(f, "failed to chown tpm storage: {}", e),
Michael Hoyle685316f2020-09-16 15:29:20 -0700196 CloneEvent(e) => write!(f, "failed to clone event: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700197 CloneVcpu(e) => write!(f, "failed to clone vcpu: {}", e),
198 ConfigureVcpu(e) => write!(f, "failed to configure vcpu: {}", e),
Andrew Scull1590e6f2020-03-18 18:00:47 +0000199 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +0800200 CreateAc97(e) => write!(f, "failed to create ac97 device: {}", e),
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -0700201 CreateConsole(e) => write!(f, "failed to create console device: {}", e),
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700202 CreateDiskError(e) => write!(f, "failed to create virtual disk: {}", e),
Michael Hoyle685316f2020-09-16 15:29:20 -0700203 CreateEvent(e) => write!(f, "failed to create event: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800204 CreatePollContext(e) => write!(f, "failed to create poll context: {}", e),
205 CreateSignalFd(e) => write!(f, "failed to create signalfd: {}", e),
206 CreateSocket(e) => write!(f, "failed to create socket: {}", e),
207 CreateTapDevice(e) => write!(f, "failed to create tap device: {}", e),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700208 CreateTimer(e) => write!(f, "failed to create Timer: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800209 CreateTpmStorage(p, e) => {
210 write!(f, "failed to create tpm storage dir {}: {}", p.display(), e)
211 }
Jingkui Wang100e6e42019-03-08 20:41:57 -0800212 CreateUsbProvider(e) => write!(f, "failed to create usb provider: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700213 CreateVcpu(e) => write!(f, "failed to create vcpu: {}", e),
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800214 CreateVfioDevice(e) => write!(f, "Failed to create vfio device {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800215 DeviceJail(e) => write!(f, "failed to jail device: {}", e),
216 DevicePivotRoot(e) => write!(f, "failed to pivot root device: {}", e),
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800217 Disk(p, e) => write!(f, "failed to load disk image {}: {}", p.display(), e),
David Tolnayc69f9752019-03-01 18:07:56 -0800218 DiskImageLock(e) => write!(f, "failed to lock disk image: {}", e),
Dmitry Torokhov71006072019-03-06 10:56:51 -0800219 DropCapabilities(e) => write!(f, "failed to drop process capabilities: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900220 FsDeviceNew(e) => write!(f, "failed to create fs device: {}", e),
221 GetMaxOpenFiles(e) => write!(f, "failed to get max number of open files: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700222 GetSignalMask(e) => write!(f, "failed to retrieve signal mask for vcpu: {}", e),
David Tolnay64cd5ea2019-04-15 15:56:35 -0700223 InputDeviceNew(e) => write!(f, "failed to set up input device: {}", e),
224 InputEventsOpen(e) => write!(f, "failed to open event device: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800225 InvalidFdPath => write!(f, "failed parsing a /proc/self/fd/*"),
226 InvalidWaylandPath => write!(f, "wayland socket path has no parent or file name"),
David Tolnayfd0971d2019-03-04 17:15:57 -0800227 IoJail(e) => write!(f, "{}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800228 LoadKernel(e) => write!(f, "failed to load kernel: {}", e),
Daniel Verkamp6a847062019-11-26 13:16:35 -0800229 MemoryTooLarge => write!(f, "requested memory size too large"),
David Tolnayc69f9752019-03-01 18:07:56 -0800230 NetDeviceNew(e) => write!(f, "failed to set up virtio networking: {}", e),
Tomasz Jeznach42644642020-05-20 23:27:59 -0700231 OpenAcpiTable(p, e) => write!(f, "failed to open ACPI file {}: {}", p.display(), e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800232 OpenAndroidFstab(p, e) => write!(
David Tolnayb4bd00f2019-02-12 17:51:26 -0800233 f,
234 "failed to open android fstab file {}: {}",
235 p.display(),
236 e
237 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700238 OpenBios(p, e) => write!(f, "failed to open bios {}: {}", p.display(), e),
David Tolnay3df35522019-03-11 12:36:30 -0700239 OpenInitrd(p, e) => write!(f, "failed to open initrd {}: {}", p.display(), e),
240 OpenKernel(p, e) => write!(f, "failed to open kernel image {}: {}", p.display(), e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800241 OpenVinput(p, e) => write!(f, "failed to open vinput device {}: {}", p.display(), e),
David Tolnayc69f9752019-03-01 18:07:56 -0800242 P9DeviceNew(e) => write!(f, "failed to create 9p device: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900243 ParseMaxOpenFiles(e) => write!(f, "failed to parse max number of open files: {}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800244 PivotRootDoesntExist(p) => write!(f, "{} doesn't exist, can't jail devices.", p),
Jakub Starona3411ea2019-04-24 10:55:25 -0700245 PmemDeviceImageTooBig => {
246 write!(f, "failed to create pmem device: pmem device image too big")
247 }
248 PmemDeviceNew(e) => write!(f, "failed to create pmem device: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800249 PollContextAdd(e) => write!(f, "failed to add fd to poll context: {}", e),
250 PollContextDelete(e) => write!(f, "failed to remove fd from poll context: {}", e),
Charles William Dick0bf8a552019-10-29 15:36:01 +0900251 ReadMemAvailable(e) => write!(f, "failed to read /proc/meminfo: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800252 RegisterBalloon(e) => write!(f, "error registering balloon device: {}", e),
253 RegisterBlock(e) => write!(f, "error registering block device: {}", e),
254 RegisterGpu(e) => write!(f, "error registering gpu device: {}", e),
255 RegisterNet(e) => write!(f, "error registering net device: {}", e),
256 RegisterP9(e) => write!(f, "error registering 9p device: {}", e),
257 RegisterRng(e) => write!(f, "error registering rng device: {}", e),
258 RegisterSignalHandler(e) => write!(f, "error registering signal handler: {}", e),
259 RegisterWayland(e) => write!(f, "error registering wayland device: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800260 ReserveGpuMemory(e) => write!(f, "failed to reserve gpu memory: {}", e),
261 ReserveMemory(e) => write!(f, "failed to reserve memory: {}", e),
Jakub Starona3411ea2019-04-24 10:55:25 -0700262 ReservePmemMemory(e) => write!(f, "failed to reserve pmem memory: {}", e),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700263 ResetTimer(e) => write!(f, "failed to reset Timer: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800264 RngDeviceNew(e) => write!(f, "failed to set up rng: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700265 RunnableVcpu(e) => write!(f, "failed to set thread id for vcpu: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800266 SettingGidMap(e) => write!(f, "error setting GID map: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900267 SettingMaxOpenFiles(e) => write!(f, "error setting max open files: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700268 SettingSignalMask(e) => write!(f, "failed to set the signal mask for vcpu: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800269 SettingUidMap(e) => write!(f, "error setting UID map: {}", e),
270 SignalFd(e) => write!(f, "failed to read signal fd: {}", e),
271 SpawnVcpu(e) => write!(f, "failed to spawn VCPU thread: {}", e),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700272 Timer(e) => write!(f, "failed to read timer fd: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800273 ValidateRawFd(e) => write!(f, "failed to validate raw fd: {}", e),
274 VhostNetDeviceNew(e) => write!(f, "failed to set up vhost networking: {}", e),
275 VhostVsockDeviceNew(e) => write!(f, "failed to set up virtual socket device: {}", e),
276 VirtioPciDev(e) => write!(f, "failed to create virtio pci dev: {}", e),
277 WaylandDeviceNew(e) => write!(f, "failed to create wayland device: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800278 }
279 }
280}
281
Allen Webbf3024c82020-06-19 07:19:48 -0700282impl From<minijail::Error> for Error {
283 fn from(err: minijail::Error) -> Self {
David Tolnayfd0971d2019-03-04 17:15:57 -0800284 Error::IoJail(err)
285 }
286}
287
David Tolnayc69f9752019-03-01 18:07:56 -0800288impl std::error::Error for Error {}
Dylan Reid059a1882018-07-23 17:58:09 -0700289
Zach Reizner39aa26b2017-12-12 18:03:23 -0800290type Result<T> = std::result::Result<T, Error>;
291
Jakub Starond99cd0a2019-04-11 14:09:39 -0700292enum TaggedControlSocket {
293 Vm(VmControlResponseSocket),
Gurchetan Singh53edb812019-05-22 08:57:16 -0700294 VmMemory(VmMemoryControlResponseSocket),
Xiong Zhang2515b752019-09-19 10:29:02 +0800295 VmIrq(VmIrqResponseSocket),
Daniel Verkampe1980a92020-02-07 11:00:55 -0800296 VmMsync(VmMsyncResponseSocket),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700297}
298
299impl AsRef<UnixSeqpacket> for TaggedControlSocket {
300 fn as_ref(&self) -> &UnixSeqpacket {
301 use self::TaggedControlSocket::*;
302 match &self {
Chirantan Ekbote50582532020-01-16 16:49:14 +0900303 Vm(ref socket) => socket.as_ref(),
304 VmMemory(ref socket) => socket.as_ref(),
305 VmIrq(ref socket) => socket.as_ref(),
Daniel Verkampe1980a92020-02-07 11:00:55 -0800306 VmMsync(ref socket) => socket.as_ref(),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700307 }
308 }
309}
310
311impl AsRawFd for TaggedControlSocket {
312 fn as_raw_fd(&self) -> RawFd {
313 self.as_ref().as_raw_fd()
314 }
315}
316
Andrew Walbranf50bab62020-07-07 13:22:53 +0100317fn get_max_open_files() -> Result<u64> {
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +0900318 let mut buf = mem::MaybeUninit::<libc::rlimit64>::zeroed();
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900319
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +0900320 // Safe because this will only modify `buf` and we check the return value.
321 let res = unsafe { libc::prlimit64(0, libc::RLIMIT_NOFILE, ptr::null(), buf.as_mut_ptr()) };
322 if res == 0 {
323 // Safe because the kernel guarantees that the struct is fully initialized.
324 let limit = unsafe { buf.assume_init() };
325 Ok(limit.rlim_max)
326 } else {
327 Err(Error::GetMaxOpenFiles(io::Error::last_os_error()))
328 }
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900329}
330
Matt Delcoc24ad782020-02-14 13:24:36 -0800331struct SandboxConfig<'a> {
332 limit_caps: bool,
333 log_failures: bool,
334 seccomp_policy: &'a Path,
335 uid_map: Option<&'a str>,
336 gid_map: Option<&'a str>,
337}
338
Zach Reizner44863792019-06-26 14:22:08 -0700339fn create_base_minijail(
340 root: &Path,
Matt Delcoc24ad782020-02-14 13:24:36 -0800341 r_limit: Option<u64>,
342 config: Option<&SandboxConfig>,
Zach Reizner44863792019-06-26 14:22:08 -0700343) -> Result<Minijail> {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800344 // All child jails run in a new user namespace without any users mapped,
345 // they run as nobody unless otherwise configured.
David Tolnay5bbbf612018-12-01 17:49:30 -0800346 let mut j = Minijail::new().map_err(Error::DeviceJail)?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800347
348 if let Some(config) = config {
349 j.namespace_pids();
350 j.namespace_user();
351 j.namespace_user_disable_setgroups();
352 if config.limit_caps {
353 // Don't need any capabilities.
354 j.use_caps(0);
355 }
356 if let Some(uid_map) = config.uid_map {
357 j.uidmap(uid_map).map_err(Error::SettingUidMap)?;
358 }
359 if let Some(gid_map) = config.gid_map {
360 j.gidmap(gid_map).map_err(Error::SettingGidMap)?;
361 }
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900362 // Run in a new mount namespace.
363 j.namespace_vfs();
364
Matt Delcoc24ad782020-02-14 13:24:36 -0800365 // Run in an empty network namespace.
366 j.namespace_net();
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900367
368 // Don't allow the device to gain new privileges.
Matt Delcoc24ad782020-02-14 13:24:36 -0800369 j.no_new_privs();
370
371 // By default we'll prioritize using the pre-compiled .bpf over the .policy
372 // file (the .bpf is expected to be compiled using "trap" as the failure
373 // behavior instead of the default "kill" behavior).
374 // Refer to the code comment for the "seccomp-log-failures"
375 // command-line parameter for an explanation about why the |log_failures|
376 // flag forces the use of .policy files (and the build-time alternative to
377 // this run-time flag).
378 let bpf_policy_file = config.seccomp_policy.with_extension("bpf");
379 if bpf_policy_file.exists() && !config.log_failures {
380 j.parse_seccomp_program(&bpf_policy_file)
381 .map_err(Error::DeviceJail)?;
382 } else {
383 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP,
384 // which will correctly kill the entire device process if a worker
385 // thread commits a seccomp violation.
386 j.set_seccomp_filter_tsync();
387 if config.log_failures {
388 j.log_seccomp_filter_failures();
389 }
390 j.parse_seccomp_filters(&config.seccomp_policy.with_extension("policy"))
391 .map_err(Error::DeviceJail)?;
392 }
393 j.use_seccomp_filter();
394 // Don't do init setup.
395 j.run_as_init();
396 }
397
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900398 // Only pivot_root if we are not re-using the current root directory.
399 if root != Path::new("/") {
400 // It's safe to call `namespace_vfs` multiple times.
401 j.namespace_vfs();
402 j.enter_pivot_root(root).map_err(Error::DevicePivotRoot)?;
403 }
Matt Delco45caf912019-11-13 08:11:09 -0800404
Matt Delcoc24ad782020-02-14 13:24:36 -0800405 // Most devices don't need to open many fds.
406 let limit = if let Some(r) = r_limit { r } else { 1024u64 };
407 j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)
408 .map_err(Error::SettingMaxOpenFiles)?;
409
Zach Reizner39aa26b2017-12-12 18:03:23 -0800410 Ok(j)
411}
412
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800413fn simple_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
Lepton Wu9105e9f2019-03-14 11:38:31 -0700414 if cfg.sandbox {
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800415 let pivot_root: &str = option_env!("DEFAULT_PIVOT_ROOT").unwrap_or("/var/empty");
416 // A directory for a jailed device's pivot root.
417 let root_path = Path::new(pivot_root);
418 if !root_path.exists() {
419 return Err(Error::PivotRootDoesntExist(pivot_root));
420 }
421 let policy_path: PathBuf = cfg.seccomp_policy_dir.join(policy);
Matt Delcoc24ad782020-02-14 13:24:36 -0800422 let config = SandboxConfig {
423 limit_caps: true,
424 log_failures: cfg.seccomp_log_failures,
425 seccomp_policy: &policy_path,
426 uid_map: None,
427 gid_map: None,
428 };
429 Ok(Some(create_base_minijail(root_path, None, Some(&config))?))
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800430 } else {
431 Ok(None)
432 }
433}
434
David Tolnayfd0971d2019-03-04 17:15:57 -0800435type DeviceResult<T = VirtioDeviceStub> = std::result::Result<T, Error>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800436
437fn create_block_device(
438 cfg: &Config,
439 disk: &DiskOption,
Jakub Staronecf81e02019-04-11 11:43:39 -0700440 disk_device_socket: DiskControlResponseSocket,
David Tolnay2b089fc2019-03-04 15:33:22 -0800441) -> DeviceResult {
442 // Special case '/proc/self/fd/*' paths. The FD is already open, just use it.
443 let raw_image: File = if disk.path.parent() == Some(Path::new("/proc/self/fd")) {
444 // Safe because we will validate |raw_fd|.
445 unsafe { File::from_raw_fd(raw_fd_from_path(&disk.path)?) }
446 } else {
447 OpenOptions::new()
448 .read(true)
449 .write(!disk.read_only)
450 .open(&disk.path)
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800451 .map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?
David Tolnay2b089fc2019-03-04 15:33:22 -0800452 };
453 // Lock the disk image to prevent other crosvm instances from using it.
454 let lock_op = if disk.read_only {
455 FlockOperation::LockShared
456 } else {
457 FlockOperation::LockExclusive
458 };
459 flock(&raw_image, lock_op, true).map_err(Error::DiskImageLock)?;
460
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700461 let disk_file = disk::create_disk_file(raw_image).map_err(Error::CreateDiskError)?;
Daniel Verkampe73c80f2019-11-08 10:11:16 -0800462 let dev = virtio::Block::new(
463 disk_file,
464 disk.read_only,
465 disk.sparse,
Daniel Verkamp27672232019-12-06 17:26:55 +1100466 disk.block_size,
Daniel Verkampe73c80f2019-11-08 10:11:16 -0800467 Some(disk_device_socket),
468 )
469 .map_err(Error::BlockDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800470
471 Ok(VirtioDeviceStub {
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700472 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800473 jail: simple_jail(&cfg, "block_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800474 })
475}
476
477fn create_rng_device(cfg: &Config) -> DeviceResult {
478 let dev = virtio::Rng::new().map_err(Error::RngDeviceNew)?;
479
480 Ok(VirtioDeviceStub {
481 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800482 jail: simple_jail(&cfg, "rng_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800483 })
484}
485
486#[cfg(feature = "tpm")]
487fn create_tpm_device(cfg: &Config) -> DeviceResult {
Michael Hoyle6b196952020-08-02 20:09:41 -0700488 use base::chown;
David Tolnay2b089fc2019-03-04 15:33:22 -0800489 use std::ffi::CString;
490 use std::fs;
491 use std::process;
David Tolnay2b089fc2019-03-04 15:33:22 -0800492
493 let tpm_storage: PathBuf;
Matt Delco45caf912019-11-13 08:11:09 -0800494 let mut tpm_jail = simple_jail(&cfg, "tpm_device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800495
496 match &mut tpm_jail {
497 Some(jail) => {
498 // Create a tmpfs in the device's root directory for tpm
499 // simulator storage. The size is 20*1024, or 20 KB.
500 jail.mount_with_data(
501 Path::new("none"),
502 Path::new("/"),
503 "tmpfs",
504 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
505 "size=20480",
506 )?;
507
508 let crosvm_ids = add_crosvm_user_to_jail(jail, "tpm")?;
509
510 let pid = process::id();
511 let tpm_pid_dir = format!("/run/vm/tpm.{}", pid);
512 tpm_storage = Path::new(&tpm_pid_dir).to_owned();
David Tolnayfd0971d2019-03-04 17:15:57 -0800513 fs::create_dir_all(&tpm_storage)
514 .map_err(|e| Error::CreateTpmStorage(tpm_storage.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800515 let tpm_pid_dir_c = CString::new(tpm_pid_dir).expect("no nul bytes");
David Tolnayfd0971d2019-03-04 17:15:57 -0800516 chown(&tpm_pid_dir_c, crosvm_ids.uid, crosvm_ids.gid)
517 .map_err(Error::ChownTpmStorage)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800518
519 jail.mount_bind(&tpm_storage, &tpm_storage, true)?;
520 }
521 None => {
522 // Path used inside cros_sdk which does not have /run/vm.
523 tpm_storage = Path::new("/tmp/tpm-simulator").to_owned();
524 }
525 }
526
527 let dev = virtio::Tpm::new(tpm_storage);
528
529 Ok(VirtioDeviceStub {
530 dev: Box::new(dev),
531 jail: tpm_jail,
532 })
533}
534
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800535fn create_single_touch_device(cfg: &Config, single_touch_spec: &TouchDeviceOption) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800536 let socket = single_touch_spec
537 .get_path()
538 .into_unix_stream()
539 .map_err(|e| {
540 error!("failed configuring virtio single touch: {:?}", e);
541 e
542 })?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800543
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800544 let (width, height) = single_touch_spec.get_size();
545 let dev = virtio::new_single_touch(socket, width, height).map_err(Error::InputDeviceNew)?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800546 Ok(VirtioDeviceStub {
547 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800548 jail: simple_jail(&cfg, "input_device")?,
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800549 })
550}
551
552fn create_trackpad_device(cfg: &Config, trackpad_spec: &TouchDeviceOption) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800553 let socket = trackpad_spec.get_path().into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800554 error!("failed configuring virtio trackpad: {}", e);
555 e
556 })?;
557
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800558 let (width, height) = trackpad_spec.get_size();
559 let dev = virtio::new_trackpad(socket, width, height).map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800560
561 Ok(VirtioDeviceStub {
562 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800563 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800564 })
565}
566
Zach Reizner65b98f12019-11-22 17:34:58 -0800567fn create_mouse_device<T: IntoUnixStream>(cfg: &Config, mouse_socket: T) -> DeviceResult {
568 let socket = mouse_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800569 error!("failed configuring virtio mouse: {}", e);
570 e
571 })?;
572
573 let dev = virtio::new_mouse(socket).map_err(Error::InputDeviceNew)?;
574
575 Ok(VirtioDeviceStub {
576 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800577 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800578 })
579}
580
Zach Reizner65b98f12019-11-22 17:34:58 -0800581fn create_keyboard_device<T: IntoUnixStream>(cfg: &Config, keyboard_socket: T) -> DeviceResult {
582 let socket = keyboard_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800583 error!("failed configuring virtio keyboard: {}", e);
584 e
585 })?;
586
587 let dev = virtio::new_keyboard(socket).map_err(Error::InputDeviceNew)?;
588
589 Ok(VirtioDeviceStub {
590 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800591 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800592 })
593}
594
595fn create_vinput_device(cfg: &Config, dev_path: &Path) -> DeviceResult {
596 let dev_file = OpenOptions::new()
597 .read(true)
598 .write(true)
599 .open(dev_path)
David Tolnayfd0971d2019-03-04 17:15:57 -0800600 .map_err(|e| Error::OpenVinput(dev_path.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800601
602 let dev = virtio::new_evdev(dev_file).map_err(Error::InputDeviceNew)?;
603
604 Ok(VirtioDeviceStub {
605 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800606 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800607 })
608}
609
Jakub Staron1f828d72019-04-11 12:49:29 -0700610fn create_balloon_device(cfg: &Config, socket: BalloonControlResponseSocket) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800611 let dev = virtio::Balloon::new(socket).map_err(Error::BalloonDeviceNew)?;
612
613 Ok(VirtioDeviceStub {
614 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800615 jail: simple_jail(&cfg, "balloon_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800616 })
617}
618
619fn create_tap_net_device(cfg: &Config, tap_fd: RawFd) -> DeviceResult {
620 // Safe because we ensure that we get a unique handle to the fd.
621 let tap = unsafe {
622 Tap::from_raw_fd(validate_raw_fd(tap_fd).map_err(Error::ValidateRawFd)?)
623 .map_err(Error::CreateTapDevice)?
624 };
625
Xiong Zhang773c7072020-03-20 10:39:55 +0800626 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
627 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700628 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800629 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
630 vq_pairs = 1;
631 }
632 let dev = virtio::Net::from(tap, vq_pairs).map_err(Error::NetDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800633
634 Ok(VirtioDeviceStub {
635 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800636 jail: simple_jail(&cfg, "net_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800637 })
638}
639
640fn create_net_device(
641 cfg: &Config,
642 host_ip: Ipv4Addr,
643 netmask: Ipv4Addr,
644 mac_address: MacAddress,
645 mem: &GuestMemory,
646) -> DeviceResult {
Xiong Zhang773c7072020-03-20 10:39:55 +0800647 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
648 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700649 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800650 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
651 vq_pairs = 1;
652 }
653
David Tolnay2b089fc2019-03-04 15:33:22 -0800654 let dev = if cfg.vhost_net {
655 let dev =
656 virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(host_ip, netmask, mac_address, mem)
657 .map_err(Error::VhostNetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800658 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800659 } else {
Xiong Zhang773c7072020-03-20 10:39:55 +0800660 let dev = virtio::Net::<Tap>::new(host_ip, netmask, mac_address, vq_pairs)
661 .map_err(Error::NetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800662 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800663 };
664
665 let policy = if cfg.vhost_net {
Matt Delco45caf912019-11-13 08:11:09 -0800666 "vhost_net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800667 } else {
Matt Delco45caf912019-11-13 08:11:09 -0800668 "net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800669 };
670
671 Ok(VirtioDeviceStub {
672 dev,
673 jail: simple_jail(&cfg, policy)?,
674 })
675}
676
677#[cfg(feature = "gpu")]
678fn create_gpu_device(
679 cfg: &Config,
Michael Hoyle685316f2020-09-16 15:29:20 -0700680 exit_evt: &Event,
Gurchetan Singh7ec58fa2019-05-15 15:30:38 -0700681 gpu_device_socket: VmMemoryControlRequestSocket,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900682 gpu_sockets: Vec<virtio::resource_bridge::ResourceResponseSocket>,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900683 wayland_socket_path: Option<&PathBuf>,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700684 x_display: Option<String>,
Zach Reizner65b98f12019-11-22 17:34:58 -0800685 event_devices: Vec<EventDevice>,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700686 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800687) -> DeviceResult {
688 let jailed_wayland_path = Path::new("/wayland-0");
689
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700690 let mut display_backends = vec![
691 virtio::DisplayBackend::X(x_display),
Jason Macnak60eb1fb2020-01-09 14:36:29 -0800692 virtio::DisplayBackend::Stub,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700693 ];
694
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900695 if let Some(socket_path) = wayland_socket_path {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700696 display_backends.insert(
697 0,
698 virtio::DisplayBackend::Wayland(if cfg.sandbox {
699 Some(jailed_wayland_path.to_owned())
700 } else {
701 Some(socket_path.to_owned())
702 }),
703 );
704 }
705
David Tolnay2b089fc2019-03-04 15:33:22 -0800706 let dev = virtio::Gpu::new(
Michael Hoyle685316f2020-09-16 15:29:20 -0700707 exit_evt.try_clone().map_err(Error::CloneEvent)?,
Gurchetan Singh7ec58fa2019-05-15 15:30:38 -0700708 Some(gpu_device_socket),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700709 NonZeroU8::new(1).unwrap(), // number of scanouts
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900710 gpu_sockets,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700711 display_backends,
Jason Macnakcc7070b2019-11-06 14:48:12 -0800712 cfg.gpu_parameters.as_ref().unwrap(),
Zach Reizner65b98f12019-11-22 17:34:58 -0800713 event_devices,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700714 map_request,
715 cfg.sandbox,
David Tolnay2b089fc2019-03-04 15:33:22 -0800716 );
717
Matt Delco45caf912019-11-13 08:11:09 -0800718 let jail = match simple_jail(&cfg, "gpu_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800719 Some(mut jail) => {
720 // Create a tmpfs in the device's root directory so that we can bind mount the
721 // dri directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
722 jail.mount_with_data(
723 Path::new("none"),
724 Path::new("/"),
725 "tmpfs",
726 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
727 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800728 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800729
730 // Device nodes required for DRM.
731 let sys_dev_char_path = Path::new("/sys/dev/char");
David Tolnayfd0971d2019-03-04 17:15:57 -0800732 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800733 let sys_devices_path = Path::new("/sys/devices");
David Tolnayfd0971d2019-03-04 17:15:57 -0800734 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
Jason Macnak23400522020-08-28 09:10:46 -0700735
David Tolnay2b089fc2019-03-04 15:33:22 -0800736 let drm_dri_path = Path::new("/dev/dri");
Jason Macnak23400522020-08-28 09:10:46 -0700737 if drm_dri_path.exists() {
738 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
739 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800740
John Batesb220eac2020-09-14 17:03:02 -0700741 // Prepare GPU shader disk cache directory.
742 if let Some(cache_dir) = cfg
743 .gpu_parameters
744 .as_ref()
745 .and_then(|params| params.cache_path.as_ref())
746 {
747 if cfg!(any(target_arch = "arm", target_arch = "aarch64")) && cfg.sandbox {
748 warn!("shader caching not yet supported on ARM with sandbox enabled");
749 env::set_var("MESA_GLSL_CACHE_DISABLE", "true");
750 } else {
751 env::set_var("MESA_GLSL_CACHE_DIR", cache_dir);
752 if let Some(cache_size) = cfg
753 .gpu_parameters
754 .as_ref()
755 .and_then(|params| params.cache_size.as_ref())
756 {
757 env::set_var("MESA_GLSL_CACHE_MAX_SIZE", cache_size);
758 }
759 let shadercache_path = Path::new(cache_dir);
760 jail.mount_bind(shadercache_path, shadercache_path, true)?;
761 }
762 }
763
David Riley06787c52019-07-24 12:09:07 -0700764 // If the ARM specific devices exist on the host, bind mount them in.
765 let mali0_path = Path::new("/dev/mali0");
766 if mali0_path.exists() {
767 jail.mount_bind(mali0_path, mali0_path, true)?;
768 }
769
770 let pvr_sync_path = Path::new("/dev/pvr_sync");
771 if pvr_sync_path.exists() {
772 jail.mount_bind(pvr_sync_path, pvr_sync_path, true)?;
773 }
774
David Tolnay2b089fc2019-03-04 15:33:22 -0800775 // Libraries that are required when mesa drivers are dynamically loaded.
David Riley06787c52019-07-24 12:09:07 -0700776 let lib_dirs = &["/usr/lib", "/usr/lib64", "/lib", "/lib64"];
777 for dir in lib_dirs {
778 let dir_path = Path::new(dir);
779 if dir_path.exists() {
780 jail.mount_bind(dir_path, dir_path, false)?;
781 }
782 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800783
784 // Bind mount the wayland socket into jail's root. This is necessary since each
785 // new wayland context must open() the socket.
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700786 if let Some(path) = wayland_socket_path {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900787 jail.mount_bind(path, jailed_wayland_path, true)?;
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700788 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800789
790 add_crosvm_user_to_jail(&mut jail, "gpu")?;
791
David Riley54e660b2019-07-24 17:22:50 -0700792 // pvr driver requires read access to /proc/self/task/*/comm.
793 let proc_path = Path::new("/proc");
794 jail.mount(
795 proc_path,
796 proc_path,
797 "proc",
798 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY) as usize,
799 )?;
800
David Tolnay2b089fc2019-03-04 15:33:22 -0800801 Some(jail)
802 }
803 None => None,
804 };
805
806 Ok(VirtioDeviceStub {
807 dev: Box::new(dev),
808 jail,
809 })
810}
811
812fn create_wayland_device(
813 cfg: &Config,
Gurchetan Singh53edb812019-05-22 08:57:16 -0700814 socket: VmMemoryControlRequestSocket,
David Tolnay2b089fc2019-03-04 15:33:22 -0800815 resource_bridge: Option<virtio::resource_bridge::ResourceRequestSocket>,
816) -> DeviceResult {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900817 let wayland_socket_dirs = cfg
818 .wayland_socket_paths
819 .iter()
820 .map(|(_name, path)| path.parent())
821 .collect::<Option<Vec<_>>>()
822 .ok_or(Error::InvalidWaylandPath)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800823
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900824 let dev = virtio::Wl::new(cfg.wayland_socket_paths.clone(), socket, resource_bridge)
825 .map_err(Error::WaylandDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800826
Matt Delco45caf912019-11-13 08:11:09 -0800827 let jail = match simple_jail(&cfg, "wl_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800828 Some(mut jail) => {
829 // Create a tmpfs in the device's root directory so that we can bind mount the wayland
830 // socket directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
831 jail.mount_with_data(
832 Path::new("none"),
833 Path::new("/"),
834 "tmpfs",
835 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
836 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800837 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800838
839 // Bind mount the wayland socket's directory into jail's root. This is necessary since
840 // each new wayland context must open() the socket. If the wayland socket is ever
841 // destroyed and remade in the same host directory, new connections will be possible
842 // without restarting the wayland device.
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900843 for dir in &wayland_socket_dirs {
844 jail.mount_bind(dir, dir, true)?;
845 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800846 add_crosvm_user_to_jail(&mut jail, "Wayland")?;
847
848 Some(jail)
849 }
850 None => None,
851 };
852
853 Ok(VirtioDeviceStub {
854 dev: Box::new(dev),
855 jail,
856 })
857}
858
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900859#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
860fn create_video_device(
861 cfg: &Config,
862 typ: devices::virtio::VideoDeviceType,
863 resource_bridge: virtio::resource_bridge::ResourceRequestSocket,
864) -> DeviceResult {
865 let jail = match simple_jail(&cfg, "video_device")? {
866 Some(mut jail) => {
867 match typ {
868 devices::virtio::VideoDeviceType::Decoder => {
869 add_crosvm_user_to_jail(&mut jail, "video-decoder")?
870 }
871 devices::virtio::VideoDeviceType::Encoder => {
872 add_crosvm_user_to_jail(&mut jail, "video-encoder")?
873 }
874 };
875
876 // Create a tmpfs in the device's root directory so that we can bind mount files.
877 jail.mount_with_data(
878 Path::new("none"),
879 Path::new("/"),
880 "tmpfs",
881 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
882 "size=67108864",
883 )?;
884
885 // Render node for libvda.
886 let dev_dri_path = Path::new("/dev/dri/renderD128");
887 jail.mount_bind(dev_dri_path, dev_dri_path, false)?;
888
889 // Device nodes required by libchrome which establishes Mojo connection in libvda.
890 let dev_urandom_path = Path::new("/dev/urandom");
891 jail.mount_bind(dev_urandom_path, dev_urandom_path, false)?;
892 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
893 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
894
895 Some(jail)
896 }
897 None => None,
898 };
899
900 Ok(VirtioDeviceStub {
901 dev: Box::new(devices::virtio::VideoDevice::new(
902 typ,
903 Some(resource_bridge),
904 )),
905 jail,
906 })
907}
908
909#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
910fn register_video_device(
911 devs: &mut Vec<VirtioDeviceStub>,
912 resource_bridges: &mut Vec<virtio::resource_bridge::ResourceResponseSocket>,
913 cfg: &Config,
914 typ: devices::virtio::VideoDeviceType,
915) -> std::result::Result<(), Error> {
916 let (video_socket, gpu_socket) =
917 virtio::resource_bridge::pair().map_err(Error::CreateSocket)?;
918 resource_bridges.push(gpu_socket);
919 devs.push(create_video_device(cfg, typ, video_socket)?);
920 Ok(())
921}
922
David Tolnay2b089fc2019-03-04 15:33:22 -0800923fn create_vhost_vsock_device(cfg: &Config, cid: u64, mem: &GuestMemory) -> DeviceResult {
924 let dev = virtio::vhost::Vsock::new(cid, mem).map_err(Error::VhostVsockDeviceNew)?;
925
926 Ok(VirtioDeviceStub {
927 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800928 jail: simple_jail(&cfg, "vhost_vsock_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800929 })
930}
931
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900932fn create_fs_device(
933 cfg: &Config,
934 uid_map: &str,
935 gid_map: &str,
936 src: &Path,
937 tag: &str,
938 fs_cfg: virtio::fs::passthrough::Config,
939) -> DeviceResult {
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900940 let max_open_files = get_max_open_files()?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800941 let j = if cfg.sandbox {
942 let seccomp_policy = cfg.seccomp_policy_dir.join("fs_device");
943 let config = SandboxConfig {
944 limit_caps: false,
945 uid_map: Some(uid_map),
946 gid_map: Some(gid_map),
947 log_failures: cfg.seccomp_log_failures,
948 seccomp_policy: &seccomp_policy,
949 };
Chirantan Ekbote34d45e52020-04-20 18:15:02 +0900950 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
951 // We want bind mounts from the parent namespaces to propagate into the fs device's
952 // namespace.
953 jail.set_remount_mode(libc::MS_SLAVE);
954
955 jail
Matt Delcoc24ad782020-02-14 13:24:36 -0800956 } else {
957 create_base_minijail(src, Some(max_open_files), None)?
958 };
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900959
960 // TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic
961 // when num_queues > 1.
962 let dev = virtio::fs::Fs::new(tag, 1, fs_cfg).map_err(Error::FsDeviceNew)?;
963
964 Ok(VirtioDeviceStub {
965 dev: Box::new(dev),
966 jail: Some(j),
967 })
968}
969
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +0900970fn create_9p_device(
971 cfg: &Config,
972 uid_map: &str,
973 gid_map: &str,
974 src: &Path,
975 tag: &str,
976) -> DeviceResult {
977 let max_open_files = get_max_open_files()?;
978 let (jail, root) = if cfg.sandbox {
979 let seccomp_policy = cfg.seccomp_policy_dir.join("9p_device");
980 let config = SandboxConfig {
981 limit_caps: false,
982 uid_map: Some(uid_map),
983 gid_map: Some(gid_map),
984 log_failures: cfg.seccomp_log_failures,
985 seccomp_policy: &seccomp_policy,
986 };
David Tolnay2b089fc2019-03-04 15:33:22 -0800987
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +0900988 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
989 // We want bind mounts from the parent namespaces to propagate into the 9p server's
990 // namespace.
991 jail.set_remount_mode(libc::MS_SLAVE);
Chirantan Ekbote055de382020-01-24 12:16:58 +0900992
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +0900993 // The shared directory becomes the root of the device's file system.
994 let root = Path::new("/");
995 (Some(jail), root)
996 } else {
997 // There's no mount namespace so we tell the server to treat the source directory as the
998 // root.
999 (None, src)
David Tolnay2b089fc2019-03-04 15:33:22 -08001000 };
1001
1002 let dev = virtio::P9::new(root, tag).map_err(Error::P9DeviceNew)?;
1003
1004 Ok(VirtioDeviceStub {
1005 dev: Box::new(dev),
1006 jail,
1007 })
1008}
1009
Jakub Starona3411ea2019-04-24 10:55:25 -07001010fn create_pmem_device(
1011 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001012 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001013 resources: &mut SystemAllocator,
1014 disk: &DiskOption,
1015 index: usize,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001016 pmem_device_socket: VmMsyncRequestSocket,
Jakub Starona3411ea2019-04-24 10:55:25 -07001017) -> DeviceResult {
1018 let fd = OpenOptions::new()
1019 .read(true)
1020 .write(!disk.read_only)
1021 .open(&disk.path)
Daniel Verkamp46d61ba2020-02-25 10:17:50 -08001022 .map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001023
Iliyan Malcheved149862020-04-17 23:57:47 +00001024 let arena_size = {
Daniel Verkamp46d61ba2020-02-25 10:17:50 -08001025 let metadata =
1026 std::fs::metadata(&disk.path).map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001027 let disk_len = metadata.len();
1028 // Linux requires pmem region sizes to be 2 MiB aligned. Linux will fill any partial page
1029 // at the end of an mmap'd file and won't write back beyond the actual file length, but if
1030 // we just align the size of the file to 2 MiB then access beyond the last page of the
1031 // mapped file will generate SIGBUS. So use a memory mapping arena that will provide
1032 // padding up to 2 MiB.
1033 let alignment = 2 * 1024 * 1024;
1034 let align_adjust = if disk_len % alignment != 0 {
1035 alignment - (disk_len % alignment)
1036 } else {
1037 0
1038 };
Iliyan Malcheved149862020-04-17 23:57:47 +00001039 disk_len
1040 .checked_add(align_adjust)
1041 .ok_or(Error::PmemDeviceImageTooBig)?
Jakub Starona3411ea2019-04-24 10:55:25 -07001042 };
1043
1044 let protection = {
1045 if disk.read_only {
1046 Protection::read()
1047 } else {
1048 Protection::read_write()
1049 }
1050 };
1051
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001052 let arena = {
Jakub Starona3411ea2019-04-24 10:55:25 -07001053 // Conversion from u64 to usize may fail on 32bit system.
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001054 let arena_size = usize::try_from(arena_size).map_err(|_| Error::PmemDeviceImageTooBig)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001055
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001056 let mut arena = MemoryMappingArena::new(arena_size).map_err(Error::ReservePmemMemory)?;
1057 arena
Iliyan Malcheved149862020-04-17 23:57:47 +00001058 .add_fd_offset_protection(0, arena_size, &fd, 0, protection)
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001059 .map_err(Error::ReservePmemMemory)?;
1060 arena
Jakub Starona3411ea2019-04-24 10:55:25 -07001061 };
1062
1063 let mapping_address = resources
Xiong Zhang383b3b52019-10-30 14:59:26 +08001064 .mmio_allocator(MmioType::High)
Jakub Starona3411ea2019-04-24 10:55:25 -07001065 .allocate_with_align(
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001066 arena_size,
Jakub Starona3411ea2019-04-24 10:55:25 -07001067 Alloc::PmemDevice(index),
1068 format!("pmem_disk_image_{}", index),
1069 // Linux kernel requires pmem namespaces to be 128 MiB aligned.
1070 128 * 1024 * 1024, /* 128 MiB */
1071 )
1072 .map_err(Error::AllocatePmemDeviceAddress)?;
1073
Daniel Verkampe1980a92020-02-07 11:00:55 -08001074 let slot = vm
Gurchetan Singh173fe622020-05-21 18:05:06 -07001075 .add_memory_region(
Daniel Verkampe1980a92020-02-07 11:00:55 -08001076 GuestAddress(mapping_address),
Gurchetan Singh173fe622020-05-21 18:05:06 -07001077 Box::new(arena),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001078 /* read_only = */ disk.read_only,
1079 /* log_dirty_pages = */ false,
1080 )
1081 .map_err(Error::AddPmemDeviceMemory)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001082
Daniel Verkampe1980a92020-02-07 11:00:55 -08001083 let dev = virtio::Pmem::new(
1084 fd,
1085 GuestAddress(mapping_address),
1086 slot,
1087 arena_size,
1088 Some(pmem_device_socket),
1089 )
1090 .map_err(Error::PmemDeviceNew)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001091
1092 Ok(VirtioDeviceStub {
1093 dev: Box::new(dev) as Box<dyn VirtioDevice>,
Matt Delco45caf912019-11-13 08:11:09 -08001094 jail: simple_jail(&cfg, "pmem_device")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07001095 })
1096}
1097
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001098fn create_console_device(cfg: &Config, param: &SerialParameters) -> DeviceResult {
1099 let mut keep_fds = Vec::new();
Michael Hoyle685316f2020-09-16 15:29:20 -07001100 let evt = Event::new().map_err(Error::CreateEvent)?;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001101 let dev = param
1102 .create_serial_device::<Console>(&evt, &mut keep_fds)
1103 .map_err(Error::CreateConsole)?;
1104
Nicholas Verne71e73d82020-07-08 17:19:55 +10001105 let jail = match simple_jail(&cfg, "serial")? {
1106 Some(mut jail) => {
1107 // Create a tmpfs in the device's root directory so that we can bind mount the
1108 // log socket directory into it.
1109 // The size=67108864 is size=64*1024*1024 or size=64MB.
1110 jail.mount_with_data(
1111 Path::new("none"),
1112 Path::new("/"),
1113 "tmpfs",
1114 (libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_NOSUID) as usize,
1115 "size=67108864",
1116 )?;
1117 add_crosvm_user_to_jail(&mut jail, "serial")?;
1118 let res = param.add_bind_mounts(&mut jail);
1119 if res.is_err() {
1120 error!("failed to add bind mounts for console device");
1121 }
1122 Some(jail)
1123 }
1124 None => None,
1125 };
1126
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001127 Ok(VirtioDeviceStub {
1128 dev: Box::new(dev),
Nicholas Verne71e73d82020-07-08 17:19:55 +10001129 jail, // TODO(dverkamp): use a separate policy for console?
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001130 })
1131}
1132
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -07001133// gpu_device_socket is not used when GPU support is disabled.
1134#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001135fn create_virtio_devices(
1136 cfg: &Config,
Zach Reizner55a9e502018-10-03 10:22:32 -07001137 mem: &GuestMemory,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001138 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001139 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001140 _exit_evt: &Event,
Gurchetan Singh53edb812019-05-22 08:57:16 -07001141 wayland_device_socket: VmMemoryControlRequestSocket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001142 gpu_device_socket: VmMemoryControlRequestSocket,
Jakub Staron1f828d72019-04-11 12:49:29 -07001143 balloon_device_socket: BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07001144 disk_device_sockets: &mut Vec<DiskControlResponseSocket>,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001145 pmem_device_sockets: &mut Vec<VmMsyncRequestSocket>,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001146 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnay2b089fc2019-03-04 15:33:22 -08001147) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -07001148 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -08001149
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001150 for (_, param) in cfg
1151 .serial_parameters
1152 .iter()
1153 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
1154 {
1155 let dev = create_console_device(cfg, param)?;
1156 devs.push(dev);
1157 }
1158
Zach Reizner8fb52112017-12-13 16:04:39 -08001159 for disk in &cfg.disks {
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001160 let disk_device_socket = disk_device_sockets.remove(0);
David Tolnay2b089fc2019-03-04 15:33:22 -08001161 devs.push(create_block_device(cfg, disk, disk_device_socket)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001162 }
1163
Jakub Starona3411ea2019-04-24 10:55:25 -07001164 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
Daniel Verkampe1980a92020-02-07 11:00:55 -08001165 let pmem_device_socket = pmem_device_sockets.remove(0);
1166 devs.push(create_pmem_device(
1167 cfg,
1168 vm,
1169 resources,
1170 pmem_disk,
1171 index,
1172 pmem_device_socket,
1173 )?);
Jakub Starona3411ea2019-04-24 10:55:25 -07001174 }
1175
David Tolnay2b089fc2019-03-04 15:33:22 -08001176 devs.push(create_rng_device(cfg)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001177
David Tolnayde6b29a2018-12-20 11:49:46 -08001178 #[cfg(feature = "tpm")]
1179 {
David Tolnay43f8e212019-02-13 17:28:16 -08001180 if cfg.software_tpm {
David Tolnay2b089fc2019-03-04 15:33:22 -08001181 devs.push(create_tpm_device(cfg)?);
David Tolnay43f8e212019-02-13 17:28:16 -08001182 }
David Tolnayde6b29a2018-12-20 11:49:46 -08001183 }
1184
Jorge E. Moreira99d3f082019-03-07 10:59:54 -08001185 if let Some(single_touch_spec) = &cfg.virtio_single_touch {
1186 devs.push(create_single_touch_device(cfg, single_touch_spec)?);
1187 }
1188
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001189 if let Some(trackpad_spec) = &cfg.virtio_trackpad {
David Tolnay2b089fc2019-03-04 15:33:22 -08001190 devs.push(create_trackpad_device(cfg, trackpad_spec)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001191 }
1192
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001193 if let Some(mouse_socket) = &cfg.virtio_mouse {
David Tolnay2b089fc2019-03-04 15:33:22 -08001194 devs.push(create_mouse_device(cfg, mouse_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001195 }
1196
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001197 if let Some(keyboard_socket) = &cfg.virtio_keyboard {
David Tolnay2b089fc2019-03-04 15:33:22 -08001198 devs.push(create_keyboard_device(cfg, keyboard_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001199 }
1200
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001201 for dev_path in &cfg.virtio_input_evdevs {
David Tolnay2b089fc2019-03-04 15:33:22 -08001202 devs.push(create_vinput_device(cfg, dev_path)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001203 }
1204
David Tolnay2b089fc2019-03-04 15:33:22 -08001205 devs.push(create_balloon_device(cfg, balloon_device_socket)?);
Dylan Reid295ccac2017-11-06 14:06:24 -08001206
Zach Reizner39aa26b2017-12-12 18:03:23 -08001207 // We checked above that if the IP is defined, then the netmask is, too.
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001208 for tap_fd in &cfg.tap_fd {
David Tolnay2b089fc2019-03-04 15:33:22 -08001209 devs.push(create_tap_net_device(cfg, *tap_fd)?);
Jorge E. Moreirab7952802019-02-12 16:43:05 -08001210 }
1211
David Tolnay2b089fc2019-03-04 15:33:22 -08001212 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
1213 (cfg.host_ip, cfg.netmask, cfg.mac_address)
1214 {
1215 devs.push(create_net_device(cfg, host_ip, netmask, mac_address, mem)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001216 }
1217
David Tolnayfa701712019-02-13 16:42:54 -08001218 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001219 let mut resource_bridges = Vec::<virtio::resource_bridge::ResourceResponseSocket>::new();
1220
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001221 if !cfg.wayland_socket_paths.is_empty() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001222 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
1223 let mut wl_resource_bridge = None::<virtio::resource_bridge::ResourceRequestSocket>;
1224
1225 #[cfg(feature = "gpu")]
1226 {
Jason Macnakcc7070b2019-11-06 14:48:12 -08001227 if cfg.gpu_parameters.is_some() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001228 let (wl_socket, gpu_socket) =
1229 virtio::resource_bridge::pair().map_err(Error::CreateSocket)?;
1230 resource_bridges.push(gpu_socket);
1231 wl_resource_bridge = Some(wl_socket);
1232 }
1233 }
1234
1235 devs.push(create_wayland_device(
1236 cfg,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001237 wayland_device_socket,
1238 wl_resource_bridge,
1239 )?);
1240 }
David Tolnayfa701712019-02-13 16:42:54 -08001241
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001242 #[cfg(feature = "video-decoder")]
1243 {
1244 if cfg.video_dec {
1245 register_video_device(
1246 &mut devs,
1247 &mut resource_bridges,
1248 cfg,
1249 devices::virtio::VideoDeviceType::Decoder,
1250 )?;
1251 }
1252 }
1253
1254 #[cfg(feature = "video-encoder")]
1255 {
1256 if cfg.video_enc {
1257 register_video_device(
1258 &mut devs,
1259 &mut resource_bridges,
1260 cfg,
1261 devices::virtio::VideoDeviceType::Encoder,
1262 )?;
1263 }
1264 }
1265
Zach Reizner3a8100a2017-09-13 19:15:43 -07001266 #[cfg(feature = "gpu")]
1267 {
Noah Golddc7f52b2020-02-01 13:01:58 -08001268 if let Some(gpu_parameters) = &cfg.gpu_parameters {
Zach Reizner65b98f12019-11-22 17:34:58 -08001269 let mut event_devices = Vec::new();
1270 if cfg.display_window_mouse {
1271 let (event_device_socket, virtio_dev_socket) =
1272 UnixStream::pair().map_err(Error::CreateSocket)?;
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001273 let (single_touch_width, single_touch_height) = cfg
1274 .virtio_single_touch
1275 .as_ref()
1276 .map(|single_touch_spec| single_touch_spec.get_size())
Noah Golddc7f52b2020-02-01 13:01:58 -08001277 .unwrap_or((gpu_parameters.display_width, gpu_parameters.display_height));
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001278 let dev = virtio::new_single_touch(
1279 virtio_dev_socket,
1280 single_touch_width,
1281 single_touch_height,
1282 )
1283 .map_err(Error::InputDeviceNew)?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001284 devs.push(VirtioDeviceStub {
1285 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -08001286 jail: simple_jail(&cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001287 });
1288 event_devices.push(EventDevice::touchscreen(event_device_socket));
1289 }
1290 if cfg.display_window_keyboard {
1291 let (event_device_socket, virtio_dev_socket) =
1292 UnixStream::pair().map_err(Error::CreateSocket)?;
1293 let dev = virtio::new_keyboard(virtio_dev_socket).map_err(Error::InputDeviceNew)?;
1294 devs.push(VirtioDeviceStub {
1295 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -08001296 jail: simple_jail(&cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001297 });
1298 event_devices.push(EventDevice::keyboard(event_device_socket));
1299 }
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001300 devs.push(create_gpu_device(
1301 cfg,
1302 _exit_evt,
1303 gpu_device_socket,
1304 resource_bridges,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001305 // Use the unnamed socket for GPU display screens.
1306 cfg.wayland_socket_paths.get(""),
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001307 cfg.x_display.clone(),
Zach Reizner65b98f12019-11-22 17:34:58 -08001308 event_devices,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001309 map_request,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001310 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -07001311 }
1312 }
1313
Zach Reizneraa575662018-08-15 10:46:32 -07001314 if let Some(cid) = cfg.cid {
David Tolnay2b089fc2019-03-04 15:33:22 -08001315 devs.push(create_vhost_vsock_device(cfg, cid, mem)?);
Zach Reizneraa575662018-08-15 10:46:32 -07001316 }
1317
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001318 for shared_dir in &cfg.shared_dirs {
1319 let SharedDir {
1320 src,
1321 tag,
1322 kind,
1323 uid_map,
1324 gid_map,
1325 cfg: fs_cfg,
1326 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -08001327
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001328 let dev = match kind {
1329 SharedDirKind::FS => create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone())?,
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001330 SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag)?,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001331 };
1332 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -08001333 }
1334
1335 Ok(devs)
1336}
1337
1338fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -06001339 cfg: &Config,
David Tolnay2b089fc2019-03-04 15:33:22 -08001340 mem: &GuestMemory,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001341 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001342 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001343 exit_evt: &Event,
Xiong Zhanga5d248c2019-09-17 14:17:19 -07001344 control_sockets: &mut Vec<TaggedControlSocket>,
Gurchetan Singh53edb812019-05-22 08:57:16 -07001345 wayland_device_socket: VmMemoryControlRequestSocket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001346 gpu_device_socket: VmMemoryControlRequestSocket,
Jakub Staron1f828d72019-04-11 12:49:29 -07001347 balloon_device_socket: BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07001348 disk_device_sockets: &mut Vec<DiskControlResponseSocket>,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001349 pmem_device_sockets: &mut Vec<VmMsyncRequestSocket>,
Jingkui Wang100e6e42019-03-08 20:41:57 -08001350 usb_provider: HostBackendDeviceProvider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001351 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnayfdac5ed2019-03-08 16:56:14 -08001352) -> DeviceResult<Vec<(Box<dyn PciDevice>, Option<Minijail>)>> {
David Tolnay2b089fc2019-03-04 15:33:22 -08001353 let stubs = create_virtio_devices(
1354 &cfg,
1355 mem,
Jakub Starona3411ea2019-04-24 10:55:25 -07001356 vm,
1357 resources,
David Tolnay2b089fc2019-03-04 15:33:22 -08001358 exit_evt,
1359 wayland_device_socket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001360 gpu_device_socket,
David Tolnay2b089fc2019-03-04 15:33:22 -08001361 balloon_device_socket,
1362 disk_device_sockets,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001363 pmem_device_sockets,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001364 map_request,
David Tolnay2b089fc2019-03-04 15:33:22 -08001365 )?;
1366
1367 let mut pci_devices = Vec::new();
1368
1369 for stub in stubs {
Daniel Verkampbb712d62019-11-19 09:47:33 -08001370 let (msi_host_socket, msi_device_socket) =
1371 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
1372 control_sockets.push(TaggedControlSocket::VmIrq(msi_host_socket));
1373 let dev = VirtioPciDevice::new(mem.clone(), stub.dev, msi_device_socket)
1374 .map_err(Error::VirtioPciDev)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -08001375 let dev = Box::new(dev) as Box<dyn PciDevice>;
David Tolnay2b089fc2019-03-04 15:33:22 -08001376 pci_devices.push((dev, stub.jail));
1377 }
1378
Andrew Scull1590e6f2020-03-18 18:00:47 +00001379 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +08001380 for ac97_param in &cfg.ac97_parameters {
1381 let dev = Ac97Dev::try_new(mem.clone(), ac97_param.clone()).map_err(Error::CreateAc97)?;
paulhsiace17e6e2020-08-28 18:37:45 +08001382 let jail = simple_jail(&cfg, dev.minijail_policy())?;
1383 pci_devices.push((Box::new(dev), jail));
David Tolnay2b089fc2019-03-04 15:33:22 -08001384 }
Andrew Scull1590e6f2020-03-18 18:00:47 +00001385
Jingkui Wang100e6e42019-03-08 20:41:57 -08001386 // Create xhci controller.
1387 let usb_controller = Box::new(XhciController::new(mem.clone(), usb_provider));
Matt Delco45caf912019-11-13 08:11:09 -08001388 pci_devices.push((usb_controller, simple_jail(&cfg, "xhci")?));
David Tolnay2b089fc2019-03-04 15:33:22 -08001389
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001390 if !cfg.vfio.is_empty() {
Xiong Zhangea6cf662019-11-11 18:32:02 +08001391 let vfio_container = Arc::new(Mutex::new(
1392 VfioContainer::new().map_err(Error::CreateVfioDevice)?,
1393 ));
1394
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001395 for vfio_path in &cfg.vfio {
Daniel Verkamp10154a92020-09-28 17:44:40 -07001396 // create MSI, MSI-X, and Mem request sockets for each vfio device
1397 let (vfio_host_socket_msi, vfio_device_socket_msi) =
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001398 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
Daniel Verkamp10154a92020-09-28 17:44:40 -07001399 control_sockets.push(TaggedControlSocket::VmIrq(vfio_host_socket_msi));
1400
1401 let (vfio_host_socket_msix, vfio_device_socket_msix) =
1402 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
1403 control_sockets.push(TaggedControlSocket::VmIrq(vfio_host_socket_msix));
Xiong Zhang4b5bb3a2019-04-23 17:15:21 +08001404
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001405 let (vfio_host_socket_mem, vfio_device_socket_mem) =
1406 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>()
1407 .map_err(Error::CreateSocket)?;
1408 control_sockets.push(TaggedControlSocket::VmMemory(vfio_host_socket_mem));
Xiong Zhang85abeff2019-04-23 17:15:24 +08001409
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001410 let vfiodevice = VfioDevice::new(vfio_path.as_path(), vm, mem, vfio_container.clone())
1411 .map_err(Error::CreateVfioDevice)?;
1412 let vfiopcidevice = Box::new(VfioPciDevice::new(
1413 vfiodevice,
Daniel Verkamp10154a92020-09-28 17:44:40 -07001414 vfio_device_socket_msi,
1415 vfio_device_socket_msix,
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001416 vfio_device_socket_mem,
1417 ));
1418 pci_devices.push((vfiopcidevice, simple_jail(&cfg, "vfio_device")?));
1419 }
Xiong Zhang17b0daf2019-04-23 17:14:50 +08001420 }
1421
David Tolnay2b089fc2019-03-04 15:33:22 -08001422 Ok(pci_devices)
1423}
1424
1425#[derive(Copy, Clone)]
Chirantan Ekbote1a2683b2019-11-26 16:28:23 +09001426#[cfg_attr(not(feature = "tpm"), allow(dead_code))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001427struct Ids {
1428 uid: uid_t,
1429 gid: gid_t,
1430}
1431
David Tolnay48c48292019-03-01 16:54:25 -08001432// Set the uid/gid for the jailed process and give a basic id map. This is
1433// required for bind mounts to work.
David Tolnayfd0971d2019-03-04 17:15:57 -08001434fn add_crosvm_user_to_jail(jail: &mut Minijail, feature: &str) -> Result<Ids> {
David Tolnay48c48292019-03-01 16:54:25 -08001435 let crosvm_user_group = CStr::from_bytes_with_nul(b"crosvm\0").unwrap();
1436
1437 let crosvm_uid = match get_user_id(&crosvm_user_group) {
1438 Ok(u) => u,
1439 Err(e) => {
1440 warn!("falling back to current user id for {}: {}", feature, e);
1441 geteuid()
1442 }
1443 };
1444
1445 let crosvm_gid = match get_group_id(&crosvm_user_group) {
1446 Ok(u) => u,
1447 Err(e) => {
1448 warn!("falling back to current group id for {}: {}", feature, e);
1449 getegid()
1450 }
1451 };
1452
1453 jail.change_uid(crosvm_uid);
1454 jail.change_gid(crosvm_gid);
1455 jail.uidmap(&format!("{0} {0} 1", crosvm_uid))
1456 .map_err(Error::SettingUidMap)?;
1457 jail.gidmap(&format!("{0} {0} 1", crosvm_gid))
1458 .map_err(Error::SettingGidMap)?;
1459
David Tolnay41a6f842019-03-01 16:18:44 -08001460 Ok(Ids {
1461 uid: crosvm_uid,
1462 gid: crosvm_gid,
1463 })
David Tolnay48c48292019-03-01 16:54:25 -08001464}
1465
David Tolnayfd0971d2019-03-04 17:15:57 -08001466fn raw_fd_from_path(path: &Path) -> Result<RawFd> {
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001467 if !path.is_file() {
David Tolnayfd0971d2019-03-04 17:15:57 -08001468 return Err(Error::InvalidFdPath);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001469 }
1470 let raw_fd = path
1471 .file_name()
1472 .and_then(|fd_osstr| fd_osstr.to_str())
1473 .and_then(|fd_str| fd_str.parse::<c_int>().ok())
1474 .ok_or(Error::InvalidFdPath)?;
David Tolnayfd0971d2019-03-04 17:15:57 -08001475 validate_raw_fd(raw_fd).map_err(Error::ValidateRawFd)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001476}
1477
Zach Reizner65b98f12019-11-22 17:34:58 -08001478trait IntoUnixStream {
1479 fn into_unix_stream(self) -> Result<UnixStream>;
1480}
1481
1482impl<'a> IntoUnixStream for &'a Path {
1483 fn into_unix_stream(self) -> Result<UnixStream> {
1484 if self.parent() == Some(Path::new("/proc/self/fd")) {
1485 // Safe because we will validate |raw_fd|.
1486 unsafe { Ok(UnixStream::from_raw_fd(raw_fd_from_path(self)?)) }
1487 } else {
1488 UnixStream::connect(self).map_err(Error::InputEventsOpen)
1489 }
1490 }
1491}
1492impl<'a> IntoUnixStream for &'a PathBuf {
1493 fn into_unix_stream(self) -> Result<UnixStream> {
1494 self.as_path().into_unix_stream()
1495 }
1496}
1497
1498impl IntoUnixStream for UnixStream {
1499 fn into_unix_stream(self) -> Result<UnixStream> {
1500 Ok(self)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001501 }
1502}
1503
Steven Richmanf32d0b42020-06-20 21:45:32 -07001504fn setup_vcpu_signal_handler<T: Vcpu>(use_hypervisor_signals: bool) -> Result<()> {
1505 if use_hypervisor_signals {
Matt Delco84cf9c02019-10-07 22:38:13 -07001506 unsafe {
1507 extern "C" fn handle_signal() {}
1508 // Our signal handler does nothing and is trivially async signal safe.
1509 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
1510 .map_err(Error::RegisterSignalHandler)?;
1511 }
1512 block_signal(SIGRTMIN() + 0).map_err(Error::BlockSignal)?;
1513 } else {
1514 unsafe {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001515 extern "C" fn handle_signal<T: Vcpu>() {
1516 T::set_local_immediate_exit(true);
Matt Delco84cf9c02019-10-07 22:38:13 -07001517 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001518 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal::<T>)
Matt Delco84cf9c02019-10-07 22:38:13 -07001519 .map_err(Error::RegisterSignalHandler)?;
1520 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001521 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001522 Ok(())
1523}
1524
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001525#[derive(Default)]
1526struct VcpuRunMode {
1527 mtx: Mutex<VmRunMode>,
1528 cvar: Condvar,
1529}
1530
1531impl VcpuRunMode {
1532 fn set_and_notify(&self, new_mode: VmRunMode) {
1533 *self.mtx.lock() = new_mode;
1534 self.cvar.notify_all();
1535 }
1536}
1537
Steven Richmanf32d0b42020-06-20 21:45:32 -07001538// Sets up a vcpu and converts it into a runnable vcpu.
Zach Reizner2c770e62020-09-30 16:49:59 -07001539fn runnable_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001540 cpu_id: usize,
1541 vcpu: Option<V>,
1542 vm: impl VmArch<Vcpu = V>,
1543 irq_chip: &mut impl IrqChipArch<V>,
1544 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001545 run_rt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001546 vcpu_affinity: Vec<usize>,
1547 has_bios: bool,
1548 use_hypervisor_signals: bool,
Zach Reizner2c770e62020-09-30 16:49:59 -07001549) -> Result<(V, VcpuRunHandle)>
Steven Richmanf32d0b42020-06-20 21:45:32 -07001550where
Zach Reizner2c770e62020-09-30 16:49:59 -07001551 V: VcpuArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001552{
1553 let mut vcpu = if let Some(v) = vcpu {
1554 v
1555 } else {
1556 // If vcpu is None, it means this arch/hypervisor requires create_vcpu to be called from the
1557 // vcpu thread.
1558 vm.create_vcpu(cpu_id).map_err(Error::CreateVcpu)?
1559 };
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001560
Steven Richmanf32d0b42020-06-20 21:45:32 -07001561 irq_chip
1562 .add_vcpu(cpu_id, vcpu.try_clone().map_err(Error::CloneVcpu)?)
1563 .map_err(Error::AddIrqChipVcpu)?;
1564
Daniel Verkampcaf9ced2020-09-29 15:35:02 -07001565 if !vcpu_affinity.is_empty() {
1566 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
1567 error!("Failed to set CPU affinity: {}", e);
1568 }
1569 }
1570
Steven Richmanf32d0b42020-06-20 21:45:32 -07001571 Arch::configure_vcpu(
1572 vm.get_memory(),
1573 vm.get_hypervisor(),
1574 irq_chip,
1575 &mut vcpu,
1576 cpu_id,
1577 vcpu_count,
1578 has_bios,
1579 )
1580 .map_err(Error::ConfigureVcpu)?;
1581
Steven Richmanf32d0b42020-06-20 21:45:32 -07001582 #[cfg(feature = "chromeos")]
1583 if let Err(e) = base::sched::enable_core_scheduling() {
1584 error!("Failed to enable core scheduling: {}", e);
1585 }
1586
Kansho Nishidaab205af2020-08-13 18:17:50 +09001587 if run_rt {
1588 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
1589 if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
1590 .and_then(|_| set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL)))
1591 {
1592 warn!("Failed to set vcpu to real time: {}", e);
1593 }
1594 }
1595
Steven Richmanf32d0b42020-06-20 21:45:32 -07001596 if use_hypervisor_signals {
1597 let mut v = get_blocked_signals().map_err(Error::GetSignalMask)?;
1598 v.retain(|&x| x != SIGRTMIN() + 0);
1599 vcpu.set_signal_mask(&v).map_err(Error::SettingSignalMask)?;
1600 }
1601
Zach Reizner2c770e62020-09-30 16:49:59 -07001602 let vcpu_run_handle = vcpu
1603 .take_run_handle(Some(SIGRTMIN() + 0))
1604 .map_err(Error::RunnableVcpu)?;
1605
1606 Ok((vcpu, vcpu_run_handle))
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001607}
1608
Zhuocheng Dingdb4c70d2019-12-02 15:50:24 +08001609#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -07001610fn inject_interrupt<T: VcpuX86_64>(
1611 irq_chip: &mut impl IrqChipX86_64<T>,
1612 vcpu: &impl VcpuX86_64,
1613 vcpu_id: usize,
1614) {
1615 if !irq_chip.interrupt_requested(vcpu_id) || !vcpu.ready_for_interrupt() {
1616 return;
1617 }
1618
1619 let vector = irq_chip
1620 .get_external_interrupt(vcpu_id)
1621 .unwrap_or_else(|e| {
1622 error!("get_external_interrupt failed on vcpu {}: {}", vcpu_id, e);
1623 None
1624 });
1625 if let Some(vector) = vector {
1626 if let Err(e) = vcpu.interrupt(vector as u32) {
1627 error!(
1628 "Failed to inject interrupt {} to vcpu {}: {}",
1629 vector, vcpu_id, e
1630 );
Zhuocheng Dingdb4c70d2019-12-02 15:50:24 +08001631 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001632 }
1633
1634 // The second interrupt request should be handled immediately, so ask vCPU to exit as soon as
1635 // possible.
1636 if irq_chip.interrupt_requested(vcpu_id) {
1637 vcpu.request_interrupt_window();
Zhuocheng Dingdb4c70d2019-12-02 15:50:24 +08001638 }
1639}
1640
1641#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -07001642fn inject_interrupt<T: Vcpu>(_irq_chip: &mut impl IrqChip<T>, _vcpu: &impl Vcpu, _vcpu_id: usize) {}
Zhuocheng Dingdb4c70d2019-12-02 15:50:24 +08001643
Zach Reizner2c770e62020-09-30 16:49:59 -07001644fn run_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001645 cpu_id: usize,
1646 vcpu: Option<V>,
1647 vm: impl VmArch<Vcpu = V> + 'static,
1648 mut irq_chip: impl IrqChipArch<V> + 'static,
1649 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001650 run_rt: bool,
Daniel Verkamp107edb32019-04-05 09:58:48 -07001651 vcpu_affinity: Vec<usize>,
Zach Reizner55a9e502018-10-03 10:22:32 -07001652 start_barrier: Arc<Barrier>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001653 has_bios: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001654 io_bus: devices::Bus,
1655 mmio_bus: devices::Bus,
Michael Hoyle685316f2020-09-16 15:29:20 -07001656 exit_evt: Event,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001657 requires_pvclock_ctrl: bool,
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001658 run_mode_arc: Arc<VcpuRunMode>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001659 use_hypervisor_signals: bool,
1660) -> Result<JoinHandle<()>>
1661where
Zach Reizner2c770e62020-09-30 16:49:59 -07001662 V: VcpuArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001663{
Zach Reizner8fb52112017-12-13 16:04:39 -08001664 thread::Builder::new()
1665 .name(format!("crosvm_vcpu{}", cpu_id))
1666 .spawn(move || {
Zach Reizner95885312020-01-29 18:06:01 -08001667 // The VCPU thread must trigger the `exit_evt` in all paths, and a `ScopedEvent`'s Drop
1668 // implementation accomplishes that.
1669 let _scoped_exit_evt = ScopedEvent::from(exit_evt);
1670
Zach Reizner2c770e62020-09-30 16:49:59 -07001671 let runnable_vcpu = runnable_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001672 cpu_id,
1673 vcpu,
1674 vm,
1675 &mut irq_chip,
1676 vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001677 run_rt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001678 vcpu_affinity,
1679 has_bios,
1680 use_hypervisor_signals,
1681 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08001682
Zach Reizner8fb52112017-12-13 16:04:39 -08001683 start_barrier.wait();
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001684
Zach Reizner2c770e62020-09-30 16:49:59 -07001685 let (vcpu, vcpu_run_handle) = match runnable_vcpu {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001686 Ok(v) => v,
1687 Err(e) => {
1688 error!("failed to start vcpu {}: {}", cpu_id, e);
1689 return;
1690 }
1691 };
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001692
Steven Richmanf32d0b42020-06-20 21:45:32 -07001693 loop {
1694 let mut interrupted_by_signal = false;
Zach Reizner2c770e62020-09-30 16:49:59 -07001695 match vcpu.run(&vcpu_run_handle) {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001696 Ok(VcpuExit::IoIn { port, mut size }) => {
1697 let mut data = [0; 8];
1698 if size > data.len() {
1699 error!("unsupported IoIn size of {} bytes", size);
1700 size = data.len();
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001701 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001702 io_bus.read(port as u64, &mut data[..size]);
1703 if let Err(e) = vcpu.set_data(&data[..size]) {
1704 error!("failed to set return data for IoIn: {}", e);
1705 }
1706 }
1707 Ok(VcpuExit::IoOut {
1708 port,
1709 mut size,
1710 data,
1711 }) => {
1712 if size > data.len() {
1713 error!("unsupported IoOut size of {} bytes", size);
1714 size = data.len();
1715 }
1716 io_bus.write(port as u64, &data[..size]);
1717 }
1718 Ok(VcpuExit::MmioRead { address, size }) => {
1719 let mut data = [0; 8];
1720 mmio_bus.read(address, &mut data[..size]);
1721 // Setting data for mmio can not fail.
1722 let _ = vcpu.set_data(&data[..size]);
1723 }
1724 Ok(VcpuExit::MmioWrite {
1725 address,
1726 size,
1727 data,
1728 }) => {
1729 mmio_bus.write(address, &data[..size]);
1730 }
1731 Ok(VcpuExit::IoapicEoi { vector }) => {
1732 if let Err(e) = irq_chip.broadcast_eoi(vector) {
1733 error!(
1734 "failed to broadcast eoi {} on vcpu {}: {}",
1735 vector, cpu_id, e
1736 );
1737 }
1738 }
1739 Ok(VcpuExit::Hlt) => break,
1740 Ok(VcpuExit::Shutdown) => break,
1741 Ok(VcpuExit::FailEntry {
1742 hardware_entry_failure_reason,
1743 }) => {
1744 error!("vcpu hw run failure: {:#x}", hardware_entry_failure_reason);
1745 break;
1746 }
1747 Ok(VcpuExit::SystemEvent(_, _)) => break,
1748 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
1749 Err(e) => match e.errno() {
1750 libc::EINTR => interrupted_by_signal = true,
1751 libc::EAGAIN => {}
1752 _ => {
1753 error!("vcpu hit unknown error: {}", e);
1754 break;
1755 }
1756 },
1757 }
1758
1759 if interrupted_by_signal {
1760 if use_hypervisor_signals {
1761 // Try to clear the signal that we use to kick VCPU if it is pending before
1762 // attempting to handle pause requests.
1763 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
1764 error!("failed to clear pending signal: {}", e);
1765 break;
1766 }
1767 } else {
1768 vcpu.set_immediate_exit(false);
1769 }
1770 let mut run_mode_lock = run_mode_arc.mtx.lock();
1771 loop {
1772 match *run_mode_lock {
1773 VmRunMode::Running => break,
1774 VmRunMode::Suspending => {
1775 // On KVM implementations that use a paravirtualized clock (e.g.
1776 // x86), a flag must be set to indicate to the guest kernel that a
1777 // VCPU was suspended. The guest kernel will use this flag to
1778 // prevent the soft lockup detection from triggering when this VCPU
1779 // resumes, which could happen days later in realtime.
1780 if requires_pvclock_ctrl {
1781 if let Err(e) = vcpu.pvclock_ctrl() {
1782 error!(
1783 "failed to tell hypervisor vcpu {} is suspending: {}",
1784 cpu_id, e
1785 );
Zach Reizner795355a2019-01-16 17:37:57 -08001786 }
1787 }
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001788 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001789 VmRunMode::Exiting => return,
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001790 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001791 // Give ownership of our exclusive lock to the condition variable that will
1792 // block. When the condition variable is notified, `wait` will unblock and
1793 // return a new exclusive lock.
1794 run_mode_lock = run_mode_arc.cvar.wait(run_mode_lock);
Zhuocheng Dingdb4c70d2019-12-02 15:50:24 +08001795 }
David Tolnay8f3a2322018-11-30 17:11:35 -08001796 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001797
Zach Reizner2c770e62020-09-30 16:49:59 -07001798 inject_interrupt(&mut irq_chip, &vcpu, cpu_id);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001799 }
David Tolnay2bac1e72018-12-12 14:33:42 -08001800 })
1801 .map_err(Error::SpawnVcpu)
Zach Reizner39aa26b2017-12-12 18:03:23 -08001802}
1803
Charles William Dick0bf8a552019-10-29 15:36:01 +09001804// Reads the contents of a file and converts the space-separated fields into a Vec of i64s.
Sonny Raod5f66082019-04-24 12:24:38 -07001805// Returns an error if any of the fields fail to parse.
Charles William Dick0bf8a552019-10-29 15:36:01 +09001806fn file_fields_to_i64<P: AsRef<Path>>(path: P) -> io::Result<Vec<i64>> {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001807 let mut file = File::open(path)?;
1808
1809 let mut buf = [0u8; 32];
1810 let count = file.read(&mut buf)?;
1811
Zach Reizner55a9e502018-10-03 10:22:32 -07001812 let content =
1813 str::from_utf8(&buf[..count]).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1814 content
1815 .trim()
Sonny Raod5f66082019-04-24 12:24:38 -07001816 .split_whitespace()
1817 .map(|x| {
Charles William Dick0bf8a552019-10-29 15:36:01 +09001818 x.parse::<i64>()
Sonny Raod5f66082019-04-24 12:24:38 -07001819 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
1820 })
1821 .collect()
1822}
1823
1824// Reads the contents of a file and converts them into a u64, and if there
1825// are multiple fields it only returns the first one.
Charles William Dick0bf8a552019-10-29 15:36:01 +09001826fn file_to_i64<P: AsRef<Path>>(path: P) -> io::Result<i64> {
1827 file_fields_to_i64(path)?
Sonny Raod5f66082019-04-24 12:24:38 -07001828 .into_iter()
1829 .next()
1830 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "empty file"))
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001831}
1832
Steven Richmanf32d0b42020-06-20 21:45:32 -07001833fn create_kvm(mem: GuestMemory) -> base::Result<KvmVm> {
1834 let kvm = Kvm::new()?;
1835 let vm = KvmVm::new(&kvm, mem)?;
1836 Ok(vm)
1837}
1838
1839fn create_kvm_kernel_irq_chip(
1840 vm: &KvmVm,
1841 vcpu_count: usize,
1842 _ioapic_device_socket: VmIrqRequestSocket,
1843) -> base::Result<impl IrqChipArch<KvmVcpu>> {
1844 let irq_chip = KvmKernelIrqChip::new(vm.try_clone()?, vcpu_count)?;
1845 Ok(irq_chip)
1846}
1847
1848#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1849fn create_kvm_split_irq_chip(
1850 vm: &KvmVm,
1851 vcpu_count: usize,
1852 ioapic_device_socket: VmIrqRequestSocket,
1853) -> base::Result<impl IrqChipArch<KvmVcpu>> {
1854 let irq_chip = KvmSplitIrqChip::new(vm.try_clone()?, vcpu_count, ioapic_device_socket)?;
1855 Ok(irq_chip)
1856}
1857
Dylan Reid059a1882018-07-23 17:58:09 -07001858pub fn run_config(cfg: Config) -> Result<()> {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001859 if cfg.split_irqchip {
1860 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
1861 {
1862 unimplemented!("KVM split irqchip mode only supported on x86 processors")
1863 }
1864
1865 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1866 {
1867 run_vm(cfg, create_kvm, create_kvm_split_irq_chip)
1868 }
1869 } else {
1870 run_vm(cfg, create_kvm, create_kvm_kernel_irq_chip)
1871 }
1872}
1873
1874fn run_vm<V, I, FV, FI>(cfg: Config, create_vm: FV, create_irq_chip: FI) -> Result<()>
1875where
1876 V: VmArch + 'static,
1877 I: IrqChipArch<V::Vcpu> + 'static,
1878 FV: FnOnce(GuestMemory) -> base::Result<V>,
1879 FI: FnOnce(
1880 &V,
1881 usize, // vcpu_count
1882 VmIrqRequestSocket, // ioapic_device_socket
1883 ) -> base::Result<I>,
1884{
Lepton Wu9105e9f2019-03-14 11:38:31 -07001885 if cfg.sandbox {
Dylan Reid059a1882018-07-23 17:58:09 -07001886 // Printing something to the syslog before entering minijail so that libc's syslogger has a
1887 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
1888 // access to those files will not be possible.
1889 info!("crosvm entering multiprocess mode");
1890 }
1891
Jingkui Wang100e6e42019-03-08 20:41:57 -08001892 let (usb_control_socket, usb_provider) =
David Tolnay5fb3f512019-04-12 19:22:33 -07001893 HostBackendDeviceProvider::new().map_err(Error::CreateUsbProvider)?;
Dylan Reid059a1882018-07-23 17:58:09 -07001894 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
1895 // before any jailed devices have been spawned, so that we can catch any of them that fail very
1896 // quickly.
1897 let sigchld_fd = SignalFd::new(libc::SIGCHLD).map_err(Error::CreateSignalFd)?;
1898
David Tolnay2b089fc2019-03-04 15:33:22 -08001899 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
1900 Some(File::open(initrd_path).map_err(|e| Error::OpenInitrd(initrd_path.clone(), e))?)
Daniel Verkampe403f5c2018-12-11 16:29:26 -08001901 } else {
1902 None
1903 };
1904
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07001905 let vm_image = match cfg.executable_path {
1906 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
1907 File::open(kernel_path).map_err(|e| Error::OpenKernel(kernel_path.to_path_buf(), e))?,
1908 ),
1909 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
1910 File::open(bios_path).map_err(|e| Error::OpenBios(bios_path.to_path_buf(), e))?,
1911 ),
1912 _ => panic!("Did not receive a bios or kernel, should be impossible."),
1913 };
1914
Dylan Reid059a1882018-07-23 17:58:09 -07001915 let components = VmComponents {
Daniel Verkamp6a847062019-11-26 13:16:35 -08001916 memory_size: cfg
1917 .memory
1918 .unwrap_or(256)
1919 .checked_mul(1024 * 1024)
1920 .ok_or(Error::MemoryTooLarge)?,
Dylan Reid059a1882018-07-23 17:58:09 -07001921 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -07001922 vcpu_affinity: cfg.vcpu_affinity.clone(),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07001923 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -08001924 android_fstab: cfg
1925 .android_fstab
1926 .as_ref()
David Tolnay2b089fc2019-03-04 15:33:22 -08001927 .map(|x| File::open(x).map_err(|e| Error::OpenAndroidFstab(x.to_path_buf(), e)))
Tristan Muntsinger4133b012018-12-21 16:01:56 -08001928 .map_or(Ok(None), |v| v.map(Some))?,
Kansho Nishida282115b2019-12-18 13:13:14 +09001929 pstore: cfg.pstore.clone(),
Daniel Verkampe403f5c2018-12-11 16:29:26 -08001930 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -07001931 extra_kernel_params: cfg.params.clone(),
1932 wayland_dmabuf: cfg.wayland_dmabuf,
Tomasz Jeznach42644642020-05-20 23:27:59 -07001933 acpi_sdts: cfg
1934 .acpi_tables
1935 .iter()
1936 .map(|path| SDT::from_file(path).map_err(|e| Error::OpenAcpiTable(path.clone(), e)))
1937 .collect::<Result<Vec<SDT>>>()?,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001938 rt_cpus: cfg.rt_cpus.clone(),
Dylan Reid059a1882018-07-23 17:58:09 -07001939 };
1940
Zach Reiznera60744b2019-02-13 17:33:32 -08001941 let control_server_socket = match &cfg.socket_path {
1942 Some(path) => Some(UnlinkUnixSeqpacketListener(
1943 UnixSeqpacketListener::bind(path).map_err(Error::CreateSocket)?,
1944 )),
1945 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07001946 };
Zach Reiznera60744b2019-02-13 17:33:32 -08001947
1948 let mut control_sockets = Vec::new();
Zach Reizner55a9e502018-10-03 10:22:32 -07001949 let (wayland_host_socket, wayland_device_socket) =
Gurchetan Singh53edb812019-05-22 08:57:16 -07001950 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?;
1951 control_sockets.push(TaggedControlSocket::VmMemory(wayland_host_socket));
Dylan Reid059a1882018-07-23 17:58:09 -07001952 // Balloon gets a special socket so balloon requests can be forwarded from the main process.
Zach Reizner55a9e502018-10-03 10:22:32 -07001953 let (balloon_host_socket, balloon_device_socket) =
Charles William Dick664cc3c2020-01-10 14:31:52 +09001954 msg_socket::pair::<BalloonControlCommand, BalloonControlResult>()
1955 .map_err(Error::CreateSocket)?;
Dylan Reid059a1882018-07-23 17:58:09 -07001956
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001957 // Create one control socket per disk.
1958 let mut disk_device_sockets = Vec::new();
1959 let mut disk_host_sockets = Vec::new();
1960 let disk_count = cfg.disks.len();
1961 for _ in 0..disk_count {
1962 let (disk_host_socket, disk_device_socket) =
Jakub Staronecf81e02019-04-11 11:43:39 -07001963 msg_socket::pair::<DiskControlCommand, DiskControlResult>()
1964 .map_err(Error::CreateSocket)?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001965 disk_host_sockets.push(disk_host_socket);
Jakub Starone7c59052019-04-09 12:31:14 -07001966 disk_device_sockets.push(disk_device_socket);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001967 }
1968
Daniel Verkampe1980a92020-02-07 11:00:55 -08001969 let mut pmem_device_sockets = Vec::new();
1970 let pmem_count = cfg.pmem_devices.len();
1971 for _ in 0..pmem_count {
1972 let (pmem_host_socket, pmem_device_socket) =
1973 msg_socket::pair::<VmMsyncResponse, VmMsyncRequest>().map_err(Error::CreateSocket)?;
1974 pmem_device_sockets.push(pmem_device_socket);
1975 control_sockets.push(TaggedControlSocket::VmMsync(pmem_host_socket));
1976 }
1977
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001978 let (gpu_host_socket, gpu_device_socket) =
1979 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?;
1980 control_sockets.push(TaggedControlSocket::VmMemory(gpu_host_socket));
1981
Zhuocheng Dingf2e90bf2019-12-02 15:50:20 +08001982 let (ioapic_host_socket, ioapic_device_socket) =
1983 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
1984 control_sockets.push(TaggedControlSocket::VmIrq(ioapic_host_socket));
1985
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001986 let map_request: Arc<Mutex<Option<ExternalMapping>>> = Arc::new(Mutex::new(None));
1987
Trent Begin17ccaad2019-04-17 13:51:25 -06001988 let linux = Arch::build_vm(
1989 components,
Trent Begin17ccaad2019-04-17 13:51:25 -06001990 &cfg.serial_parameters,
Matt Delco45caf912019-11-13 08:11:09 -08001991 simple_jail(&cfg, "serial")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07001992 |mem, vm, sys_allocator, exit_evt| {
Trent Begin17ccaad2019-04-17 13:51:25 -06001993 create_devices(
1994 &cfg,
Jakub Starona3411ea2019-04-24 10:55:25 -07001995 mem,
1996 vm,
1997 sys_allocator,
1998 exit_evt,
Xiong Zhanga5d248c2019-09-17 14:17:19 -07001999 &mut control_sockets,
Trent Begin17ccaad2019-04-17 13:51:25 -06002000 wayland_device_socket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07002001 gpu_device_socket,
Trent Begin17ccaad2019-04-17 13:51:25 -06002002 balloon_device_socket,
2003 &mut disk_device_sockets,
Daniel Verkampe1980a92020-02-07 11:00:55 -08002004 &mut pmem_device_sockets,
Trent Begin17ccaad2019-04-17 13:51:25 -06002005 usb_provider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002006 Arc::clone(&map_request),
Trent Begin17ccaad2019-04-17 13:51:25 -06002007 )
2008 },
Steven Richmanf32d0b42020-06-20 21:45:32 -07002009 create_vm,
2010 |vm, vcpu_count| create_irq_chip(vm, vcpu_count, ioapic_device_socket),
Trent Begin17ccaad2019-04-17 13:51:25 -06002011 )
David Tolnaybe034262019-03-04 17:48:36 -08002012 .map_err(Error::BuildVm)?;
Lepton Wu60893882018-11-21 11:06:18 -08002013
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002014 run_control(
2015 linux,
Zach Reiznera60744b2019-02-13 17:33:32 -08002016 control_server_socket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002017 control_sockets,
2018 balloon_host_socket,
2019 &disk_host_sockets,
Jingkui Wang100e6e42019-03-08 20:41:57 -08002020 usb_control_socket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002021 sigchld_fd,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002022 cfg.sandbox,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002023 Arc::clone(&map_request),
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002024 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07002025}
2026
Steven Richmanf32d0b42020-06-20 21:45:32 -07002027fn run_control<V: VmArch + 'static, I: IrqChipArch<V::Vcpu> + 'static>(
2028 mut linux: RunnableLinuxVm<V, I>,
Zach Reiznera60744b2019-02-13 17:33:32 -08002029 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Jakub Starond99cd0a2019-04-11 14:09:39 -07002030 mut control_sockets: Vec<TaggedControlSocket>,
Jakub Staron1f828d72019-04-11 12:49:29 -07002031 balloon_host_socket: BalloonControlRequestSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07002032 disk_host_sockets: &[DiskControlRequestSocket],
Jingkui Wang100e6e42019-03-08 20:41:57 -08002033 usb_control_socket: UsbControlSocket,
Zach Reizner55a9e502018-10-03 10:22:32 -07002034 sigchld_fd: SignalFd,
Lepton Wu20333e42019-03-14 10:48:03 -07002035 sandbox: bool,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002036 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Zach Reizner55a9e502018-10-03 10:22:32 -07002037) -> Result<()> {
David Tolnay5bbbf612018-12-01 17:49:30 -08002038 const LOWMEM_AVAILABLE: &str = "/sys/kernel/mm/chromeos-low_mem/available";
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002039
Zach Reizner5bed0d22018-03-28 02:31:11 -07002040 #[derive(PollToken)]
2041 enum Token {
2042 Exit,
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002043 Suspend,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002044 ChildSignal,
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002045 IrqFd { gsi: usize },
Charles William Dick0bf8a552019-10-29 15:36:01 +09002046 BalanceMemory,
2047 BalloonResult,
Zach Reiznera60744b2019-02-13 17:33:32 -08002048 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002049 VmControl { index: usize },
2050 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002051
Zach Reizner19ad1f32019-12-12 18:58:50 -08002052 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08002053 .set_raw_mode()
2054 .expect("failed to set terminal raw mode");
2055
Zach Reiznerb2110be2019-07-23 15:55:03 -07002056 let poll_ctx = PollContext::build_with(&[
2057 (&linux.exit_evt, Token::Exit),
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002058 (&linux.suspend_evt, Token::Suspend),
Zach Reiznerb2110be2019-07-23 15:55:03 -07002059 (&sigchld_fd, Token::ChildSignal),
2060 ])
2061 .map_err(Error::PollContextAdd)?;
2062
Zach Reiznera60744b2019-02-13 17:33:32 -08002063 if let Some(socket_server) = &control_server_socket {
2064 poll_ctx
2065 .add(socket_server, Token::VmControlServer)
2066 .map_err(Error::PollContextAdd)?;
2067 }
Dylan Reid059a1882018-07-23 17:58:09 -07002068 for (index, socket) in control_sockets.iter().enumerate() {
Zach Reizner55a9e502018-10-03 10:22:32 -07002069 poll_ctx
2070 .add(socket.as_ref(), Token::VmControl { index })
2071 .map_err(Error::PollContextAdd)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002072 }
2073
Steven Richmanf32d0b42020-06-20 21:45:32 -07002074 let events = linux
2075 .irq_chip
2076 .irq_event_tokens()
2077 .map_err(Error::PollContextAdd)?;
2078
2079 for (gsi, evt) in events {
2080 poll_ctx
2081 .add(&evt, Token::IrqFd { gsi: gsi as usize })
2082 .map_err(Error::PollContextAdd)?;
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002083 }
2084
Charles William Dick0bf8a552019-10-29 15:36:01 +09002085 // Balance available memory between guest and host every second.
Michael Hoyle08d86a42020-08-19 14:45:21 -07002086 let mut balancemem_timer = Timer::new().map_err(Error::CreateTimer)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002087 if Path::new(LOWMEM_AVAILABLE).exists() {
2088 // Create timer request balloon stats every 1s.
2089 poll_ctx
2090 .add(&balancemem_timer, Token::BalanceMemory)
2091 .map_err(Error::PollContextAdd)?;
2092 let balancemem_dur = Duration::from_secs(1);
2093 let balancemem_int = Duration::from_secs(1);
2094 balancemem_timer
2095 .reset(balancemem_dur, Some(balancemem_int))
Michael Hoyle08d86a42020-08-19 14:45:21 -07002096 .map_err(Error::ResetTimer)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002097
2098 // Listen for balloon statistics from the guest so we can balance.
2099 poll_ctx
2100 .add(&balloon_host_socket, Token::BalloonResult)
2101 .map_err(Error::PollContextAdd)?;
2102 } else {
2103 warn!("Unable to open low mem available, maybe not a chrome os kernel");
2104 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002105
Lepton Wu20333e42019-03-14 10:48:03 -07002106 if sandbox {
2107 // Before starting VCPUs, in case we started with some capabilities, drop them all.
2108 drop_capabilities().map_err(Error::DropCapabilities)?;
2109 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08002110
Steven Richmanf32d0b42020-06-20 21:45:32 -07002111 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
2112 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
Zach Reizner6a8fdd92019-01-16 14:38:41 -08002113 let run_mode_arc = Arc::new(VcpuRunMode::default());
Steven Richmanf32d0b42020-06-20 21:45:32 -07002114 let use_hypervisor_signals = !linux
2115 .vm
2116 .get_hypervisor()
2117 .check_capability(&HypervisorCap::ImmediateExit);
2118 setup_vcpu_signal_handler::<V::Vcpu>(use_hypervisor_signals)?;
2119
2120 let vcpus: Vec<Option<V::Vcpu>> = match linux.vcpus.take() {
2121 Some(vec) => vec.into_iter().map(|vcpu| Some(vcpu)).collect(),
2122 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
2123 };
Daniel Verkamp94c35272019-09-12 13:31:30 -07002124 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Daniel Verkampc677fb42020-09-08 13:47:49 -07002125 let vcpu_affinity = match linux.vcpu_affinity.clone() {
2126 Some(VcpuAffinity::Global(v)) => v,
2127 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(),
2128 None => Default::default(),
2129 };
Zach Reizner55a9e502018-10-03 10:22:32 -07002130 let handle = run_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002131 cpu_id,
Zach Reizner55a9e502018-10-03 10:22:32 -07002132 vcpu,
Michael Hoyle685316f2020-09-16 15:29:20 -07002133 linux.vm.try_clone().map_err(Error::CloneEvent)?,
2134 linux.irq_chip.try_clone().map_err(Error::CloneEvent)?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002135 linux.vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002136 linux.rt_cpus.contains(&cpu_id),
Daniel Verkampc677fb42020-09-08 13:47:49 -07002137 vcpu_affinity,
Zach Reizner55a9e502018-10-03 10:22:32 -07002138 vcpu_thread_barrier.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002139 linux.has_bios,
Zach Reizner55a9e502018-10-03 10:22:32 -07002140 linux.io_bus.clone(),
2141 linux.mmio_bus.clone(),
Michael Hoyle685316f2020-09-16 15:29:20 -07002142 linux.exit_evt.try_clone().map_err(Error::CloneEvent)?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002143 linux.vm.check_capability(VmCap::PvClockSuspend),
Zach Reizner6a8fdd92019-01-16 14:38:41 -08002144 run_mode_arc.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002145 use_hypervisor_signals,
Zach Reizner55a9e502018-10-03 10:22:32 -07002146 )?;
Dylan Reid059a1882018-07-23 17:58:09 -07002147 vcpu_handles.push(handle);
2148 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002149
Dylan Reid059a1882018-07-23 17:58:09 -07002150 vcpu_thread_barrier.wait();
2151
Zach Reizner39aa26b2017-12-12 18:03:23 -08002152 'poll: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002153 let events = {
2154 match poll_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002155 Ok(v) => v,
2156 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08002157 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08002158 break;
2159 }
2160 }
2161 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002162
Steven Richmanf32d0b42020-06-20 21:45:32 -07002163 if let Err(e) = linux.irq_chip.process_delayed_irq_events() {
2164 warn!("can't deliver delayed irqs: {}", e);
2165 }
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002166
Zach Reiznera60744b2019-02-13 17:33:32 -08002167 let mut vm_control_indices_to_remove = Vec::new();
Zach Reizner5bed0d22018-03-28 02:31:11 -07002168 for event in events.iter_readable() {
2169 match event.token() {
2170 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002171 info!("vcpu requested shutdown");
2172 break 'poll;
2173 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002174 Token::Suspend => {
2175 info!("VM requested suspend");
2176 linux.suspend_evt.read().unwrap();
2177 run_mode_arc.set_and_notify(VmRunMode::Suspending);
2178 for handle in &vcpu_handles {
2179 let _ = handle.kill(SIGRTMIN() + 0);
2180 }
2181 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002182 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002183 // Print all available siginfo structs, then exit the loop.
David Tolnayf5032762018-12-03 10:46:45 -08002184 while let Some(siginfo) = sigchld_fd.read().map_err(Error::SignalFd)? {
Zach Reizner3ba00982019-01-23 19:04:43 -08002185 let pid = siginfo.ssi_pid;
2186 let pid_label = match linux.pid_debug_label_map.get(&pid) {
2187 Some(label) => format!("{} (pid {})", label, pid),
2188 None => format!("pid {}", pid),
2189 };
David Tolnayf5032762018-12-03 10:46:45 -08002190 error!(
2191 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08002192 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08002193 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08002194 }
David Tolnayf5032762018-12-03 10:46:45 -08002195 break 'poll;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002196 }
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002197 Token::IrqFd { gsi } => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002198 if let Err(e) = linux.irq_chip.service_irq_event(gsi as u32) {
2199 error!("failed to signal irq {}: {}", gsi, e);
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002200 }
2201 }
Charles William Dick0bf8a552019-10-29 15:36:01 +09002202 Token::BalanceMemory => {
Michael Hoyle08d86a42020-08-19 14:45:21 -07002203 balancemem_timer.wait().map_err(Error::Timer)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002204 let command = BalloonControlCommand::Stats {};
2205 if let Err(e) = balloon_host_socket.send(&command) {
2206 warn!("failed to send stats request to balloon device: {}", e);
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002207 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002208 }
Charles William Dick0bf8a552019-10-29 15:36:01 +09002209 Token::BalloonResult => {
2210 match balloon_host_socket.recv() {
2211 Ok(BalloonControlResult::Stats {
2212 stats,
2213 balloon_actual: balloon_actual_u,
2214 }) => {
2215 // Available memory is reported in MB, and we need bytes.
2216 let host_available = file_to_i64(LOWMEM_AVAILABLE)
2217 .map_err(Error::ReadMemAvailable)?
2218 << 20;
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002219 let guest_free_u = if let Some(free) = stats.free_memory {
2220 free
Charles William Dick0bf8a552019-10-29 15:36:01 +09002221 } else {
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002222 warn!("guest free_memory stat is missing");
Charles William Dick0bf8a552019-10-29 15:36:01 +09002223 continue;
2224 };
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002225 let guest_cached_u = if let Some(cached) = stats.disk_caches {
2226 cached
2227 } else {
2228 warn!("guest disk_caches stat is missing");
2229 continue;
2230 };
2231 if guest_free_u > i64::max_value() as u64 {
2232 warn!("guest free memory is too large");
2233 continue;
2234 }
2235 if guest_cached_u > i64::max_value() as u64 {
2236 warn!("guest cached memory is too large");
Charles William Dick0bf8a552019-10-29 15:36:01 +09002237 continue;
2238 }
2239 if balloon_actual_u > i64::max_value() as u64 {
2240 warn!("actual balloon size is too large");
2241 continue;
2242 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002243 // Tell the guest to change the balloon size if the target balloon size
2244 // is more than 5% different from the current balloon size.
Charles William Dick0bf8a552019-10-29 15:36:01 +09002245 const RESIZE_PERCENT: i64 = 5;
2246 let balloon_actual = balloon_actual_u as i64;
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002247 let guest_free = guest_free_u as i64;
2248 let guest_cached = guest_cached_u as i64;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002249 // Compute how much memory the guest should have available after we
2250 // rebalance.
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002251 let guest_available_target = host_available;
2252 let guest_available_delta =
2253 guest_available_target - guest_free - guest_cached;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002254 // How much do we have to change the balloon to balance.
Charles William Dick0bf8a552019-10-29 15:36:01 +09002255 let balloon_target = max(balloon_actual - guest_available_delta, 0);
Steven Richmanf32d0b42020-06-20 21:45:32 -07002256 // Compute the change in balloon size in percent. If the balloon size
2257 // is 0, use 1 so we don't overflow from the infinity % increase.
Charles William Dick0bf8a552019-10-29 15:36:01 +09002258 let balloon_change_percent = (balloon_actual - balloon_target).abs()
2259 * 100
2260 / max(balloon_actual, 1);
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002261
Charles William Dick0bf8a552019-10-29 15:36:01 +09002262 if balloon_change_percent >= RESIZE_PERCENT {
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002263 info!("resizing balloon: host avail {}, guest free {} cached {} (target {}), balloon actual {} (target {})",
Daniel Verkamp1cd80992020-07-27 12:41:50 -07002264 host_available,
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002265 guest_free,
2266 guest_cached,
Daniel Verkamp1cd80992020-07-27 12:41:50 -07002267 guest_available_target,
2268 balloon_actual,
2269 balloon_target,
2270 );
Charles William Dick0bf8a552019-10-29 15:36:01 +09002271 let command = BalloonControlCommand::Adjust {
2272 num_bytes: balloon_target as u64,
2273 };
2274 if let Err(e) = balloon_host_socket.send(&command) {
2275 warn!("failed to send memory value to balloon device: {}", e);
2276 }
2277 }
2278 }
2279 Err(e) => {
2280 error!("failed to recv BalloonControlResult: {}", e);
2281 }
2282 };
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002283 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002284 Token::VmControlServer => {
2285 if let Some(socket_server) = &control_server_socket {
2286 match socket_server.accept() {
2287 Ok(socket) => {
2288 poll_ctx
2289 .add(
2290 &socket,
2291 Token::VmControl {
2292 index: control_sockets.len(),
2293 },
2294 )
2295 .map_err(Error::PollContextAdd)?;
Jakub Starond99cd0a2019-04-11 14:09:39 -07002296 control_sockets
2297 .push(TaggedControlSocket::Vm(MsgSocket::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08002298 }
2299 Err(e) => error!("failed to accept socket: {}", e),
2300 }
2301 }
2302 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002303 Token::VmControl { index } => {
Daniel Verkamp37c4a782019-01-04 10:44:17 -08002304 if let Some(socket) = control_sockets.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002305 match socket {
2306 TaggedControlSocket::Vm(socket) => match socket.recv() {
2307 Ok(request) => {
2308 let mut run_mode_opt = None;
2309 let response = request.execute(
2310 &mut run_mode_opt,
2311 &balloon_host_socket,
2312 disk_host_sockets,
2313 &usb_control_socket,
2314 );
2315 if let Err(e) = socket.send(&response) {
2316 error!("failed to send VmResponse: {}", e);
2317 }
2318 if let Some(run_mode) = run_mode_opt {
2319 info!("control socket changed run mode to {}", run_mode);
2320 match run_mode {
2321 VmRunMode::Exiting => {
2322 break 'poll;
2323 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002324 VmRunMode::Running => {
2325 if let VmRunMode::Suspending =
2326 *run_mode_arc.mtx.lock()
2327 {
2328 linux.io_bus.notify_resume();
2329 }
2330 run_mode_arc.set_and_notify(VmRunMode::Running);
2331 for handle in &vcpu_handles {
2332 let _ = handle.kill(SIGRTMIN() + 0);
2333 }
2334 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07002335 other => {
2336 run_mode_arc.set_and_notify(other);
2337 for handle in &vcpu_handles {
2338 let _ = handle.kill(SIGRTMIN() + 0);
2339 }
Zach Reizner6a8fdd92019-01-16 14:38:41 -08002340 }
2341 }
2342 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002343 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07002344 Err(e) => {
Zach Reizner297ae772020-02-21 14:45:14 -08002345 if let MsgError::RecvZero = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002346 vm_control_indices_to_remove.push(index);
2347 } else {
2348 error!("failed to recv VmRequest: {}", e);
2349 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002350 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07002351 },
Gurchetan Singh53edb812019-05-22 08:57:16 -07002352 TaggedControlSocket::VmMemory(socket) => match socket.recv() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002353 Ok(request) => {
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002354 let response = request.execute(
2355 &mut linux.vm,
2356 &mut linux.resources,
2357 Arc::clone(&map_request),
2358 );
Jakub Starond99cd0a2019-04-11 14:09:39 -07002359 if let Err(e) = socket.send(&response) {
Gurchetan Singh53edb812019-05-22 08:57:16 -07002360 error!("failed to send VmMemoryControlResponse: {}", e);
Jakub Starond99cd0a2019-04-11 14:09:39 -07002361 }
2362 }
2363 Err(e) => {
Zach Reizner297ae772020-02-21 14:45:14 -08002364 if let MsgError::RecvZero = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002365 vm_control_indices_to_remove.push(index);
2366 } else {
Gurchetan Singh53edb812019-05-22 08:57:16 -07002367 error!("failed to recv VmMemoryControlRequest: {}", e);
Jakub Starond99cd0a2019-04-11 14:09:39 -07002368 }
2369 }
2370 },
Xiong Zhang2515b752019-09-19 10:29:02 +08002371 TaggedControlSocket::VmIrq(socket) => match socket.recv() {
2372 Ok(request) => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002373 let response = {
2374 let irq_chip = &mut linux.irq_chip;
2375 request.execute(
2376 |setup| match setup {
2377 IrqSetup::Event(irq, ev) => {
2378 irq_chip.register_irq_event(irq, ev, None)
2379 }
2380 IrqSetup::Route(route) => irq_chip.route_irq(route),
2381 },
2382 &mut linux.resources,
2383 )
2384 };
Xiong Zhang2515b752019-09-19 10:29:02 +08002385 if let Err(e) = socket.send(&response) {
2386 error!("failed to send VmIrqResponse: {}", e);
2387 }
2388 }
2389 Err(e) => {
Zach Reizner297ae772020-02-21 14:45:14 -08002390 if let MsgError::RecvZero = e {
Xiong Zhang2515b752019-09-19 10:29:02 +08002391 vm_control_indices_to_remove.push(index);
2392 } else {
2393 error!("failed to recv VmIrqRequest: {}", e);
2394 }
2395 }
2396 },
Daniel Verkampe1980a92020-02-07 11:00:55 -08002397 TaggedControlSocket::VmMsync(socket) => match socket.recv() {
2398 Ok(request) => {
2399 let response = request.execute(&mut linux.vm);
2400 if let Err(e) = socket.send(&response) {
2401 error!("failed to send VmMsyncResponse: {}", e);
2402 }
2403 }
2404 Err(e) => {
2405 if let MsgError::BadRecvSize { actual: 0, .. } = e {
2406 vm_control_indices_to_remove.push(index);
2407 } else {
2408 error!("failed to recv VmMsyncRequest: {}", e);
2409 }
2410 }
2411 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08002412 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002413 }
2414 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002415 }
2416 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002417
Zach Reizner5bed0d22018-03-28 02:31:11 -07002418 for event in events.iter_hungup() {
Zach Reiznera60744b2019-02-13 17:33:32 -08002419 match event.token() {
2420 Token::Exit => {}
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002421 Token::Suspend => {}
Zach Reiznera60744b2019-02-13 17:33:32 -08002422 Token::ChildSignal => {}
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002423 Token::IrqFd { gsi: _ } => {}
Charles William Dick0bf8a552019-10-29 15:36:01 +09002424 Token::BalanceMemory => {}
2425 Token::BalloonResult => {}
Zach Reiznera60744b2019-02-13 17:33:32 -08002426 Token::VmControlServer => {}
2427 Token::VmControl { index } => {
2428 // It's possible more data is readable and buffered while the socket is hungup,
2429 // so don't delete the socket from the poll context until we're sure all the
2430 // data is read.
Jakub Starond99cd0a2019-04-11 14:09:39 -07002431 match control_sockets
2432 .get(index)
2433 .map(|s| s.as_ref().get_readable_bytes())
2434 {
Zach Reiznera60744b2019-02-13 17:33:32 -08002435 Some(Ok(0)) | Some(Err(_)) => vm_control_indices_to_remove.push(index),
2436 Some(Ok(x)) => info!("control index {} has {} bytes readable", index, x),
2437 _ => {}
Zach Reizner55a9e502018-10-03 10:22:32 -07002438 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002439 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002440 }
2441 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002442
2443 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08002444 // preserves correct indexes as each element is removed.
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07002445 vm_control_indices_to_remove.sort_unstable_by_key(|&k| Reverse(k));
Zach Reiznera60744b2019-02-13 17:33:32 -08002446 vm_control_indices_to_remove.dedup();
2447 for index in vm_control_indices_to_remove {
Zide Chen89584072019-11-14 10:33:51 -08002448 // Delete the socket from the `poll_ctx` synchronously. Otherwise, the kernel will do
2449 // this automatically when the FD inserted into the `poll_ctx` is closed after this
2450 // if-block, but this removal can be deferred unpredictably. In some instances where the
2451 // system is under heavy load, we can even get events returned by `poll_ctx` for an FD
2452 // that has already been closed. Because the token associated with that spurious event
2453 // now belongs to a different socket, the control loop will start to interact with
2454 // sockets that might not be ready to use. This can cause incorrect hangup detection or
2455 // blocking on a socket that will never be ready. See also: crbug.com/1019986
2456 if let Some(socket) = control_sockets.get(index) {
2457 poll_ctx.delete(socket).map_err(Error::PollContextDelete)?;
2458 }
2459
2460 // This line implicitly drops the socket at `index` when it gets returned by
2461 // `swap_remove`. After this line, the socket at `index` is not the one from
2462 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
2463 // use `poll_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznera60744b2019-02-13 17:33:32 -08002464 control_sockets.swap_remove(index);
2465 if let Some(socket) = control_sockets.get(index) {
2466 poll_ctx
Xiong Zhang44bb3dd2019-04-23 17:09:50 +08002467 .modify(
2468 socket,
2469 WatchingEvents::empty().set_read(),
2470 Token::VmControl { index },
2471 )
Zach Reiznera60744b2019-02-13 17:33:32 -08002472 .map_err(Error::PollContextAdd)?;
2473 }
2474 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002475 }
2476
Zach Reizner6a8fdd92019-01-16 14:38:41 -08002477 // VCPU threads MUST see the VmRunMode flag, otherwise they may re-enter the VM.
2478 run_mode_arc.set_and_notify(VmRunMode::Exiting);
Dylan Reid059a1882018-07-23 17:58:09 -07002479 for handle in vcpu_handles {
Dmitry Torokhovcd405332018-02-16 16:25:54 -08002480 match handle.kill(SIGRTMIN() + 0) {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002481 Ok(_) => {
2482 if let Err(e) = handle.join() {
2483 error!("failed to join vcpu thread: {:?}", e);
2484 }
2485 }
David Tolnayb4bd00f2019-02-12 17:51:26 -08002486 Err(e) => error!("failed to kill vcpu thread: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -08002487 }
2488 }
2489
Daniel Verkamp94c35272019-09-12 13:31:30 -07002490 // Explicitly drop the VM structure here to allow the devices to clean up before the
2491 // control sockets are closed when this function exits.
2492 mem::drop(linux);
2493
Zach Reizner19ad1f32019-12-12 18:58:50 -08002494 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08002495 .set_canon_mode()
2496 .expect("failed to restore canonical mode for terminal");
2497
2498 Ok(())
2499}