blob: 2e31efe699fdad1d416e771a46359f9686f63169 [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07005use std::cmp::{max, Reverse};
Jakub Starona3411ea2019-04-24 10:55:25 -07006use std::convert::TryFrom;
John Batesb220eac2020-09-14 17:03:02 -07007#[cfg(feature = "gpu")]
8use std::env;
David Tolnayfdac5ed2019-03-08 16:56:14 -08009use std::error::Error as StdError;
Dylan Reid059a1882018-07-23 17:58:09 -070010use std::ffi::CStr;
David Tolnayc69f9752019-03-01 18:07:56 -080011use std::fmt::{self, Display};
Dylan Reid059a1882018-07-23 17:58:09 -070012use std::fs::{File, OpenOptions};
Zach Reizner55a9e502018-10-03 10:22:32 -070013use std::io::{self, stdin, Read};
Steven Richmanf32d0b42020-06-20 21:45:32 -070014use std::iter;
Daniel Verkamp94c35272019-09-12 13:31:30 -070015use std::mem;
David Tolnay2b089fc2019-03-04 15:33:22 -080016use std::net::Ipv4Addr;
Daniel Verkamp6f9215c2019-08-20 09:41:22 -070017#[cfg(feature = "gpu")]
Zach Reizner0f2cfb02019-06-19 17:46:03 -070018use std::num::NonZeroU8;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +090019use std::num::ParseIntError;
Michael Hoylea596a072020-11-10 19:32:45 -080020use std::os::unix::io::FromRawFd;
Zach Reiznera60744b2019-02-13 17:33:32 -080021use std::os::unix::net::UnixStream;
Zach Reizner39aa26b2017-12-12 18:03:23 -080022use std::path::{Path, PathBuf};
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +090023use std::ptr;
Chirantan Ekbote448516e2018-07-24 16:07:42 -070024use std::str;
Dylan Reidb0492662019-05-17 14:50:13 -070025use std::sync::{mpsc, Arc, Barrier};
26
Zach Reizner39aa26b2017-12-12 18:03:23 -080027use std::thread;
28use std::thread::JoinHandle;
Charles William Dick0bf8a552019-10-29 15:36:01 +090029use std::time::Duration;
Zach Reizner39aa26b2017-12-12 18:03:23 -080030
David Tolnay41a6f842019-03-01 16:18:44 -080031use libc::{self, c_int, gid_t, uid_t};
Zach Reizner39aa26b2017-12-12 18:03:23 -080032
Tomasz Jeznach42644642020-05-20 23:27:59 -070033use acpi_tables::sdt::SDT;
34
Michael Hoyle6b196952020-08-02 20:09:41 -070035use base::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Zach Reizner65b98f12019-11-22 17:34:58 -080036#[cfg(feature = "gpu")]
37use devices::virtio::EventDevice;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070038use devices::virtio::{self, Console, VirtioDevice};
paulhsiace17e6e2020-08-28 18:37:45 +080039#[cfg(feature = "audio")]
40use devices::Ac97Dev;
Xiong Zhang17b0daf2019-04-23 17:14:50 +080041use devices::{
Steven Richman11dc6712020-09-02 15:39:14 -070042 self, HostBackendDeviceProvider, IrqChip, IrqEventIndex, KvmKernelIrqChip, PciDevice,
43 VcpuRunState, VfioContainer, VfioDevice, VfioPciDevice, VirtioPciDevice, XhciController,
Xiong Zhang17b0daf2019-04-23 17:14:50 +080044};
Steven Richmanf32d0b42020-06-20 21:45:32 -070045use hypervisor::kvm::{Kvm, KvmVcpu, KvmVm};
Zach Reizner304e7312020-09-29 16:00:24 -070046use hypervisor::{HypervisorCap, Vcpu, VcpuExit, VcpuRunHandle, Vm, VmCap};
Allen Webbf3024c82020-06-19 07:19:48 -070047use minijail::{self, Minijail};
Zach Reiznera60744b2019-02-13 17:33:32 -080048use msg_socket::{MsgError, MsgReceiver, MsgSender, MsgSocket};
David Tolnay2b089fc2019-03-04 15:33:22 -080049use net_util::{Error as NetError, MacAddress, Tap};
David Tolnay3df35522019-03-11 12:36:30 -070050use remain::sorted;
Xiong Zhang87a3b442019-10-29 17:32:44 +080051use resources::{Alloc, MmioType, SystemAllocator};
Dylan Reidb0492662019-05-17 14:50:13 -070052use sync::Mutex;
Jakub Starona3411ea2019-04-24 10:55:25 -070053
Michael Hoyle6b196952020-08-02 20:09:41 -070054use base::{
David Tolnay633426a2019-04-12 12:18:35 -070055 self, block_signal, clear_signal, drop_capabilities, error, flock, get_blocked_signals,
Fletcher Woodruff82ff3972019-10-02 13:11:34 -060056 get_group_id, get_user_id, getegid, geteuid, info, register_rt_signal_handler,
Michael Hoylea596a072020-11-10 19:32:45 -080057 set_cpu_affinity, set_rt_prio_limit, set_rt_round_robin, signal, validate_raw_descriptor, warn,
58 AsRawDescriptor, Event, EventType, ExternalMapping, FlockOperation, FromRawDescriptor,
59 Killable, MemoryMappingArena, PollToken, Protection, RawDescriptor, ScopedEvent, SignalFd,
60 Terminal, Timer, WaitContext, SIGRTMIN,
Zach Reiznera60744b2019-02-13 17:33:32 -080061};
Jakub Starone7c59052019-04-09 12:31:14 -070062use vm_control::{
Jakub Staron1f828d72019-04-11 12:49:29 -070063 BalloonControlCommand, BalloonControlRequestSocket, BalloonControlResponseSocket,
Charles William Dick664cc3c2020-01-10 14:31:52 +090064 BalloonControlResult, DiskControlCommand, DiskControlRequestSocket, DiskControlResponseSocket,
Keiichi Watanabec5262e92020-10-21 15:57:33 +090065 DiskControlResult, IrqSetup, UsbControlSocket, VcpuControl, VmControlResponseSocket,
66 VmIrqRequest, VmIrqRequestSocket, VmIrqResponse, VmIrqResponseSocket,
67 VmMemoryControlRequestSocket, VmMemoryControlResponseSocket, VmMemoryRequest, VmMemoryResponse,
68 VmMsyncRequest, VmMsyncRequestSocket, VmMsyncResponse, VmMsyncResponseSocket, VmRunMode,
Jakub Starone7c59052019-04-09 12:31:14 -070069};
Keiichi Watanabec5262e92020-10-21 15:57:33 +090070#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
71use vm_control::{VcpuDebug, VcpuDebugStatus, VcpuDebugStatusMessage, VmRequest, VmResponse};
Dylan Reidec058d62020-07-20 20:21:11 -070072use vm_memory::{GuestAddress, GuestMemory};
Zach Reizner39aa26b2017-12-12 18:03:23 -080073
Keiichi Watanabec5262e92020-10-21 15:57:33 +090074#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
75use crate::gdb::{gdb_thread, GdbStub};
Daniel Verkamp50740ce2020-02-28 12:36:56 -080076use crate::{Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070077use arch::{
Daniel Verkampc677fb42020-09-08 13:47:49 -070078 self, LinuxArch, RunnableLinuxVm, SerialHardware, SerialParameters, VcpuAffinity,
79 VirtioDeviceStub, VmComponents, VmImage,
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070080};
Sonny Raoed517d12018-02-13 22:09:43 -080081
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080082#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070083use {
84 aarch64::AArch64 as Arch,
Steven Richman11dc6712020-09-02 15:39:14 -070085 devices::IrqChipAArch64 as IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -070086 hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
87};
Zach Reizner55a9e502018-10-03 10:22:32 -070088#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070089use {
Steven Richman11dc6712020-09-02 15:39:14 -070090 devices::{IrqChipX86_64 as IrqChipArch, KvmSplitIrqChip},
91 hypervisor::{VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
Steven Richmanf32d0b42020-06-20 21:45:32 -070092 x86_64::X8664arch as Arch,
93};
Zach Reizner39aa26b2017-12-12 18:03:23 -080094
David Tolnay3df35522019-03-11 12:36:30 -070095#[sorted]
Dylan Reid059a1882018-07-23 17:58:09 -070096#[derive(Debug)]
Zach Reizner39aa26b2017-12-12 18:03:23 -080097pub enum Error {
Michael Hoyle6b196952020-08-02 20:09:41 -070098 AddGpuDeviceMemory(base::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -070099 AddIrqChipVcpu(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700100 AddPmemDeviceMemory(base::Error),
Lepton Wu60893882018-11-21 11:06:18 -0800101 AllocateGpuDeviceAddress,
Jakub Starona3411ea2019-04-24 10:55:25 -0700102 AllocatePmemDeviceAddress(resources::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800103 BalloonDeviceNew(virtio::BalloonError),
Michael Hoyle6b196952020-08-02 20:09:41 -0700104 BlockDeviceNew(base::Error),
105 BlockSignal(base::signal::Error),
David Tolnaybe034262019-03-04 17:48:36 -0800106 BuildVm(<Arch as LinuxArch>::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700107 ChownTpmStorage(base::Error),
Michael Hoyle685316f2020-09-16 15:29:20 -0700108 CloneEvent(base::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700109 CloneVcpu(base::Error),
110 ConfigureVcpu(<Arch as LinuxArch>::Error),
Andrew Scull1590e6f2020-03-18 18:00:47 +0000111 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +0800112 CreateAc97(devices::PciDeviceError),
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -0700113 CreateConsole(arch::serial::Error),
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700114 CreateDiskError(disk::Error),
Michael Hoyle685316f2020-09-16 15:29:20 -0700115 CreateEvent(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700116 CreateSignalFd(base::SignalFdError),
Zach Reizner8fb52112017-12-13 16:04:39 -0800117 CreateSocket(io::Error),
Chirantan Ekbote49fa08f2018-11-16 13:26:53 -0800118 CreateTapDevice(NetError),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700119 CreateTimer(base::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -0800120 CreateTpmStorage(PathBuf, io::Error),
Jingkui Wang100e6e42019-03-08 20:41:57 -0800121 CreateUsbProvider(devices::usb::host_backend::error::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700122 CreateVcpu(base::Error),
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800123 CreateVfioDevice(devices::vfio::VfioError),
Michael Hoylee392c462020-10-07 03:29:24 -0700124 CreateWaitContext(base::Error),
Allen Webbf3024c82020-06-19 07:19:48 -0700125 DeviceJail(minijail::Error),
126 DevicePivotRoot(minijail::Error),
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800127 Disk(PathBuf, io::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700128 DiskImageLock(base::Error),
129 DropCapabilities(base::Error),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900130 FsDeviceNew(virtio::fs::Error),
131 GetMaxOpenFiles(io::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700132 GetSignalMask(signal::Error),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900133 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
134 HandleDebugCommand(<Arch as LinuxArch>::Error),
Lepton Wu39133a02019-02-27 12:42:29 -0800135 InputDeviceNew(virtio::InputError),
136 InputEventsOpen(std::io::Error),
Dylan Reid20566442018-04-02 15:06:15 -0700137 InvalidFdPath,
Zach Reizner579bd2c2018-09-14 15:43:33 -0700138 InvalidWaylandPath,
Allen Webbf3024c82020-06-19 07:19:48 -0700139 IoJail(minijail::Error),
David Tolnayfdac5ed2019-03-08 16:56:14 -0800140 LoadKernel(Box<dyn StdError>),
Daniel Verkamp6a847062019-11-26 13:16:35 -0800141 MemoryTooLarge,
David Tolnay2b089fc2019-03-04 15:33:22 -0800142 NetDeviceNew(virtio::NetError),
Tomasz Jeznach42644642020-05-20 23:27:59 -0700143 OpenAcpiTable(PathBuf, io::Error),
Tristan Muntsinger4133b012018-12-21 16:01:56 -0800144 OpenAndroidFstab(PathBuf, io::Error),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700145 OpenBios(PathBuf, io::Error),
Daniel Verkampe403f5c2018-12-11 16:29:26 -0800146 OpenInitrd(PathBuf, io::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800147 OpenKernel(PathBuf, io::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -0800148 OpenVinput(PathBuf, io::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800149 P9DeviceNew(virtio::P9Error),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900150 ParseMaxOpenFiles(ParseIntError),
Lepton Wu39133a02019-02-27 12:42:29 -0800151 PivotRootDoesntExist(&'static str),
Jakub Starona3411ea2019-04-24 10:55:25 -0700152 PmemDeviceImageTooBig,
Michael Hoyle6b196952020-08-02 20:09:41 -0700153 PmemDeviceNew(base::Error),
Charles William Dick0bf8a552019-10-29 15:36:01 +0900154 ReadMemAvailable(io::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700155 RegisterBalloon(arch::DeviceRegistrationError),
156 RegisterBlock(arch::DeviceRegistrationError),
157 RegisterGpu(arch::DeviceRegistrationError),
158 RegisterNet(arch::DeviceRegistrationError),
159 RegisterP9(arch::DeviceRegistrationError),
160 RegisterRng(arch::DeviceRegistrationError),
Michael Hoyle6b196952020-08-02 20:09:41 -0700161 RegisterSignalHandler(base::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700162 RegisterWayland(arch::DeviceRegistrationError),
Michael Hoyle6b196952020-08-02 20:09:41 -0700163 ReserveGpuMemory(base::MmapError),
164 ReserveMemory(base::Error),
165 ReservePmemMemory(base::MmapError),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700166 ResetTimer(base::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800167 RngDeviceNew(virtio::RngError),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700168 RunnableVcpu(base::Error),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900169 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
170 SendDebugStatus(Box<mpsc::SendError<VcpuDebugStatusMessage>>),
Allen Webbf3024c82020-06-19 07:19:48 -0700171 SettingGidMap(minijail::Error),
172 SettingMaxOpenFiles(minijail::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700173 SettingSignalMask(base::Error),
Allen Webbf3024c82020-06-19 07:19:48 -0700174 SettingUidMap(minijail::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700175 SignalFd(base::SignalFdError),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900176 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
177 SpawnGdbServer(io::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800178 SpawnVcpu(io::Error),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700179 Timer(base::Error),
Michael Hoylea596a072020-11-10 19:32:45 -0800180 ValidateRawDescriptor(base::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800181 VhostNetDeviceNew(virtio::vhost::Error),
182 VhostVsockDeviceNew(virtio::vhost::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700183 VirtioPciDev(base::Error),
Michael Hoylee392c462020-10-07 03:29:24 -0700184 WaitContextAdd(base::Error),
185 WaitContextDelete(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700186 WaylandDeviceNew(base::Error),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800187}
188
David Tolnayc69f9752019-03-01 18:07:56 -0800189impl Display for Error {
David Tolnay3df35522019-03-11 12:36:30 -0700190 #[remain::check]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800191 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
David Tolnayc69f9752019-03-01 18:07:56 -0800192 use self::Error::*;
193
David Tolnay3df35522019-03-11 12:36:30 -0700194 #[sorted]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800195 match self {
Lepton Wu60893882018-11-21 11:06:18 -0800196 AddGpuDeviceMemory(e) => write!(f, "failed to add gpu device memory: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700197 AddIrqChipVcpu(e) => write!(f, "failed to add vcpu to irq chip: {}", e),
Jakub Starona3411ea2019-04-24 10:55:25 -0700198 AddPmemDeviceMemory(e) => write!(f, "failed to add pmem device memory: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800199 AllocateGpuDeviceAddress => write!(f, "failed to allocate gpu device guest address"),
Jakub Starona3411ea2019-04-24 10:55:25 -0700200 AllocatePmemDeviceAddress(e) => {
201 write!(f, "failed to allocate memory for pmem device: {}", e)
202 }
David Tolnayc69f9752019-03-01 18:07:56 -0800203 BalloonDeviceNew(e) => write!(f, "failed to create balloon: {}", e),
204 BlockDeviceNew(e) => write!(f, "failed to create block device: {}", e),
205 BlockSignal(e) => write!(f, "failed to block signal: {}", e),
David Tolnaybe034262019-03-04 17:48:36 -0800206 BuildVm(e) => write!(f, "The architecture failed to build the vm: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800207 ChownTpmStorage(e) => write!(f, "failed to chown tpm storage: {}", e),
Michael Hoyle685316f2020-09-16 15:29:20 -0700208 CloneEvent(e) => write!(f, "failed to clone event: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700209 CloneVcpu(e) => write!(f, "failed to clone vcpu: {}", e),
210 ConfigureVcpu(e) => write!(f, "failed to configure vcpu: {}", e),
Andrew Scull1590e6f2020-03-18 18:00:47 +0000211 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +0800212 CreateAc97(e) => write!(f, "failed to create ac97 device: {}", e),
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -0700213 CreateConsole(e) => write!(f, "failed to create console device: {}", e),
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700214 CreateDiskError(e) => write!(f, "failed to create virtual disk: {}", e),
Michael Hoyle685316f2020-09-16 15:29:20 -0700215 CreateEvent(e) => write!(f, "failed to create event: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800216 CreateSignalFd(e) => write!(f, "failed to create signalfd: {}", e),
217 CreateSocket(e) => write!(f, "failed to create socket: {}", e),
218 CreateTapDevice(e) => write!(f, "failed to create tap device: {}", e),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700219 CreateTimer(e) => write!(f, "failed to create Timer: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800220 CreateTpmStorage(p, e) => {
221 write!(f, "failed to create tpm storage dir {}: {}", p.display(), e)
222 }
Jingkui Wang100e6e42019-03-08 20:41:57 -0800223 CreateUsbProvider(e) => write!(f, "failed to create usb provider: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700224 CreateVcpu(e) => write!(f, "failed to create vcpu: {}", e),
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800225 CreateVfioDevice(e) => write!(f, "Failed to create vfio device {}", e),
Michael Hoylee392c462020-10-07 03:29:24 -0700226 CreateWaitContext(e) => write!(f, "failed to create wait context: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800227 DeviceJail(e) => write!(f, "failed to jail device: {}", e),
228 DevicePivotRoot(e) => write!(f, "failed to pivot root device: {}", e),
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800229 Disk(p, e) => write!(f, "failed to load disk image {}: {}", p.display(), e),
David Tolnayc69f9752019-03-01 18:07:56 -0800230 DiskImageLock(e) => write!(f, "failed to lock disk image: {}", e),
Dmitry Torokhov71006072019-03-06 10:56:51 -0800231 DropCapabilities(e) => write!(f, "failed to drop process capabilities: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900232 FsDeviceNew(e) => write!(f, "failed to create fs device: {}", e),
233 GetMaxOpenFiles(e) => write!(f, "failed to get max number of open files: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700234 GetSignalMask(e) => write!(f, "failed to retrieve signal mask for vcpu: {}", e),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900235 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
236 HandleDebugCommand(e) => write!(f, "failed to handle a gdb command: {}", e),
David Tolnay64cd5ea2019-04-15 15:56:35 -0700237 InputDeviceNew(e) => write!(f, "failed to set up input device: {}", e),
238 InputEventsOpen(e) => write!(f, "failed to open event device: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800239 InvalidFdPath => write!(f, "failed parsing a /proc/self/fd/*"),
240 InvalidWaylandPath => write!(f, "wayland socket path has no parent or file name"),
David Tolnayfd0971d2019-03-04 17:15:57 -0800241 IoJail(e) => write!(f, "{}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800242 LoadKernel(e) => write!(f, "failed to load kernel: {}", e),
Daniel Verkamp6a847062019-11-26 13:16:35 -0800243 MemoryTooLarge => write!(f, "requested memory size too large"),
David Tolnayc69f9752019-03-01 18:07:56 -0800244 NetDeviceNew(e) => write!(f, "failed to set up virtio networking: {}", e),
Tomasz Jeznach42644642020-05-20 23:27:59 -0700245 OpenAcpiTable(p, e) => write!(f, "failed to open ACPI file {}: {}", p.display(), e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800246 OpenAndroidFstab(p, e) => write!(
David Tolnayb4bd00f2019-02-12 17:51:26 -0800247 f,
248 "failed to open android fstab file {}: {}",
249 p.display(),
250 e
251 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700252 OpenBios(p, e) => write!(f, "failed to open bios {}: {}", p.display(), e),
David Tolnay3df35522019-03-11 12:36:30 -0700253 OpenInitrd(p, e) => write!(f, "failed to open initrd {}: {}", p.display(), e),
254 OpenKernel(p, e) => write!(f, "failed to open kernel image {}: {}", p.display(), e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800255 OpenVinput(p, e) => write!(f, "failed to open vinput device {}: {}", p.display(), e),
David Tolnayc69f9752019-03-01 18:07:56 -0800256 P9DeviceNew(e) => write!(f, "failed to create 9p device: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900257 ParseMaxOpenFiles(e) => write!(f, "failed to parse max number of open files: {}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800258 PivotRootDoesntExist(p) => write!(f, "{} doesn't exist, can't jail devices.", p),
Jakub Starona3411ea2019-04-24 10:55:25 -0700259 PmemDeviceImageTooBig => {
260 write!(f, "failed to create pmem device: pmem device image too big")
261 }
262 PmemDeviceNew(e) => write!(f, "failed to create pmem device: {}", e),
Charles William Dick0bf8a552019-10-29 15:36:01 +0900263 ReadMemAvailable(e) => write!(f, "failed to read /proc/meminfo: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800264 RegisterBalloon(e) => write!(f, "error registering balloon device: {}", e),
265 RegisterBlock(e) => write!(f, "error registering block device: {}", e),
266 RegisterGpu(e) => write!(f, "error registering gpu device: {}", e),
267 RegisterNet(e) => write!(f, "error registering net device: {}", e),
268 RegisterP9(e) => write!(f, "error registering 9p device: {}", e),
269 RegisterRng(e) => write!(f, "error registering rng device: {}", e),
270 RegisterSignalHandler(e) => write!(f, "error registering signal handler: {}", e),
271 RegisterWayland(e) => write!(f, "error registering wayland device: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800272 ReserveGpuMemory(e) => write!(f, "failed to reserve gpu memory: {}", e),
273 ReserveMemory(e) => write!(f, "failed to reserve memory: {}", e),
Jakub Starona3411ea2019-04-24 10:55:25 -0700274 ReservePmemMemory(e) => write!(f, "failed to reserve pmem memory: {}", e),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700275 ResetTimer(e) => write!(f, "failed to reset Timer: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800276 RngDeviceNew(e) => write!(f, "failed to set up rng: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700277 RunnableVcpu(e) => write!(f, "failed to set thread id for vcpu: {}", e),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900278 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
279 SendDebugStatus(e) => write!(f, "failed to send a debug status to GDB thread: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800280 SettingGidMap(e) => write!(f, "error setting GID map: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900281 SettingMaxOpenFiles(e) => write!(f, "error setting max open files: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700282 SettingSignalMask(e) => write!(f, "failed to set the signal mask for vcpu: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800283 SettingUidMap(e) => write!(f, "error setting UID map: {}", e),
284 SignalFd(e) => write!(f, "failed to read signal fd: {}", e),
Keiichi Watanabec5262e92020-10-21 15:57:33 +0900285 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
286 SpawnGdbServer(e) => write!(f, "failed to spawn GDB thread: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800287 SpawnVcpu(e) => write!(f, "failed to spawn VCPU thread: {}", e),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700288 Timer(e) => write!(f, "failed to read timer fd: {}", e),
Michael Hoylea596a072020-11-10 19:32:45 -0800289 ValidateRawDescriptor(e) => write!(f, "failed to validate raw descriptor: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800290 VhostNetDeviceNew(e) => write!(f, "failed to set up vhost networking: {}", e),
291 VhostVsockDeviceNew(e) => write!(f, "failed to set up virtual socket device: {}", e),
292 VirtioPciDev(e) => write!(f, "failed to create virtio pci dev: {}", e),
Michael Hoylee392c462020-10-07 03:29:24 -0700293 WaitContextAdd(e) => write!(f, "failed to add descriptor to wait context: {}", e),
294 WaitContextDelete(e) => {
295 write!(f, "failed to remove descriptor from wait context: {}", e)
296 }
David Tolnayc69f9752019-03-01 18:07:56 -0800297 WaylandDeviceNew(e) => write!(f, "failed to create wayland device: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800298 }
299 }
300}
301
Allen Webbf3024c82020-06-19 07:19:48 -0700302impl From<minijail::Error> for Error {
303 fn from(err: minijail::Error) -> Self {
David Tolnayfd0971d2019-03-04 17:15:57 -0800304 Error::IoJail(err)
305 }
306}
307
David Tolnayc69f9752019-03-01 18:07:56 -0800308impl std::error::Error for Error {}
Dylan Reid059a1882018-07-23 17:58:09 -0700309
Zach Reizner39aa26b2017-12-12 18:03:23 -0800310type Result<T> = std::result::Result<T, Error>;
311
Jakub Starond99cd0a2019-04-11 14:09:39 -0700312enum TaggedControlSocket {
313 Vm(VmControlResponseSocket),
Gurchetan Singh53edb812019-05-22 08:57:16 -0700314 VmMemory(VmMemoryControlResponseSocket),
Xiong Zhang2515b752019-09-19 10:29:02 +0800315 VmIrq(VmIrqResponseSocket),
Daniel Verkampe1980a92020-02-07 11:00:55 -0800316 VmMsync(VmMsyncResponseSocket),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700317}
318
319impl AsRef<UnixSeqpacket> for TaggedControlSocket {
320 fn as_ref(&self) -> &UnixSeqpacket {
321 use self::TaggedControlSocket::*;
322 match &self {
Chirantan Ekbote50582532020-01-16 16:49:14 +0900323 Vm(ref socket) => socket.as_ref(),
324 VmMemory(ref socket) => socket.as_ref(),
325 VmIrq(ref socket) => socket.as_ref(),
Daniel Verkampe1980a92020-02-07 11:00:55 -0800326 VmMsync(ref socket) => socket.as_ref(),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700327 }
328 }
329}
330
Michael Hoylee392c462020-10-07 03:29:24 -0700331impl AsRawDescriptor for TaggedControlSocket {
332 fn as_raw_descriptor(&self) -> RawDescriptor {
Michael Hoylea596a072020-11-10 19:32:45 -0800333 self.as_ref().as_raw_descriptor()
Jakub Starond99cd0a2019-04-11 14:09:39 -0700334 }
335}
336
Andrew Walbranf50bab62020-07-07 13:22:53 +0100337fn get_max_open_files() -> Result<u64> {
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +0900338 let mut buf = mem::MaybeUninit::<libc::rlimit64>::zeroed();
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900339
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +0900340 // Safe because this will only modify `buf` and we check the return value.
341 let res = unsafe { libc::prlimit64(0, libc::RLIMIT_NOFILE, ptr::null(), buf.as_mut_ptr()) };
342 if res == 0 {
343 // Safe because the kernel guarantees that the struct is fully initialized.
344 let limit = unsafe { buf.assume_init() };
345 Ok(limit.rlim_max)
346 } else {
347 Err(Error::GetMaxOpenFiles(io::Error::last_os_error()))
348 }
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900349}
350
Matt Delcoc24ad782020-02-14 13:24:36 -0800351struct SandboxConfig<'a> {
352 limit_caps: bool,
353 log_failures: bool,
354 seccomp_policy: &'a Path,
355 uid_map: Option<&'a str>,
356 gid_map: Option<&'a str>,
357}
358
Zach Reizner44863792019-06-26 14:22:08 -0700359fn create_base_minijail(
360 root: &Path,
Matt Delcoc24ad782020-02-14 13:24:36 -0800361 r_limit: Option<u64>,
362 config: Option<&SandboxConfig>,
Zach Reizner44863792019-06-26 14:22:08 -0700363) -> Result<Minijail> {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800364 // All child jails run in a new user namespace without any users mapped,
365 // they run as nobody unless otherwise configured.
David Tolnay5bbbf612018-12-01 17:49:30 -0800366 let mut j = Minijail::new().map_err(Error::DeviceJail)?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800367
368 if let Some(config) = config {
369 j.namespace_pids();
370 j.namespace_user();
371 j.namespace_user_disable_setgroups();
372 if config.limit_caps {
373 // Don't need any capabilities.
374 j.use_caps(0);
375 }
376 if let Some(uid_map) = config.uid_map {
377 j.uidmap(uid_map).map_err(Error::SettingUidMap)?;
378 }
379 if let Some(gid_map) = config.gid_map {
380 j.gidmap(gid_map).map_err(Error::SettingGidMap)?;
381 }
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900382 // Run in a new mount namespace.
383 j.namespace_vfs();
384
Matt Delcoc24ad782020-02-14 13:24:36 -0800385 // Run in an empty network namespace.
386 j.namespace_net();
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900387
388 // Don't allow the device to gain new privileges.
Matt Delcoc24ad782020-02-14 13:24:36 -0800389 j.no_new_privs();
390
391 // By default we'll prioritize using the pre-compiled .bpf over the .policy
392 // file (the .bpf is expected to be compiled using "trap" as the failure
393 // behavior instead of the default "kill" behavior).
394 // Refer to the code comment for the "seccomp-log-failures"
395 // command-line parameter for an explanation about why the |log_failures|
396 // flag forces the use of .policy files (and the build-time alternative to
397 // this run-time flag).
398 let bpf_policy_file = config.seccomp_policy.with_extension("bpf");
399 if bpf_policy_file.exists() && !config.log_failures {
400 j.parse_seccomp_program(&bpf_policy_file)
401 .map_err(Error::DeviceJail)?;
402 } else {
403 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP,
404 // which will correctly kill the entire device process if a worker
405 // thread commits a seccomp violation.
406 j.set_seccomp_filter_tsync();
407 if config.log_failures {
408 j.log_seccomp_filter_failures();
409 }
410 j.parse_seccomp_filters(&config.seccomp_policy.with_extension("policy"))
411 .map_err(Error::DeviceJail)?;
412 }
413 j.use_seccomp_filter();
414 // Don't do init setup.
415 j.run_as_init();
416 }
417
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900418 // Only pivot_root if we are not re-using the current root directory.
419 if root != Path::new("/") {
420 // It's safe to call `namespace_vfs` multiple times.
421 j.namespace_vfs();
422 j.enter_pivot_root(root).map_err(Error::DevicePivotRoot)?;
423 }
Matt Delco45caf912019-11-13 08:11:09 -0800424
Matt Delcoc24ad782020-02-14 13:24:36 -0800425 // Most devices don't need to open many fds.
426 let limit = if let Some(r) = r_limit { r } else { 1024u64 };
427 j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)
428 .map_err(Error::SettingMaxOpenFiles)?;
429
Zach Reizner39aa26b2017-12-12 18:03:23 -0800430 Ok(j)
431}
432
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800433fn simple_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
Lepton Wu9105e9f2019-03-14 11:38:31 -0700434 if cfg.sandbox {
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800435 let pivot_root: &str = option_env!("DEFAULT_PIVOT_ROOT").unwrap_or("/var/empty");
436 // A directory for a jailed device's pivot root.
437 let root_path = Path::new(pivot_root);
438 if !root_path.exists() {
439 return Err(Error::PivotRootDoesntExist(pivot_root));
440 }
441 let policy_path: PathBuf = cfg.seccomp_policy_dir.join(policy);
Matt Delcoc24ad782020-02-14 13:24:36 -0800442 let config = SandboxConfig {
443 limit_caps: true,
444 log_failures: cfg.seccomp_log_failures,
445 seccomp_policy: &policy_path,
446 uid_map: None,
447 gid_map: None,
448 };
449 Ok(Some(create_base_minijail(root_path, None, Some(&config))?))
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800450 } else {
451 Ok(None)
452 }
453}
454
David Tolnayfd0971d2019-03-04 17:15:57 -0800455type DeviceResult<T = VirtioDeviceStub> = std::result::Result<T, Error>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800456
457fn create_block_device(
458 cfg: &Config,
459 disk: &DiskOption,
Jakub Staronecf81e02019-04-11 11:43:39 -0700460 disk_device_socket: DiskControlResponseSocket,
David Tolnay2b089fc2019-03-04 15:33:22 -0800461) -> DeviceResult {
462 // Special case '/proc/self/fd/*' paths. The FD is already open, just use it.
463 let raw_image: File = if disk.path.parent() == Some(Path::new("/proc/self/fd")) {
464 // Safe because we will validate |raw_fd|.
Michael Hoylea596a072020-11-10 19:32:45 -0800465 unsafe { File::from_raw_descriptor(raw_descriptor_from_path(&disk.path)?) }
David Tolnay2b089fc2019-03-04 15:33:22 -0800466 } else {
467 OpenOptions::new()
468 .read(true)
469 .write(!disk.read_only)
470 .open(&disk.path)
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800471 .map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?
David Tolnay2b089fc2019-03-04 15:33:22 -0800472 };
473 // Lock the disk image to prevent other crosvm instances from using it.
474 let lock_op = if disk.read_only {
475 FlockOperation::LockShared
476 } else {
477 FlockOperation::LockExclusive
478 };
479 flock(&raw_image, lock_op, true).map_err(Error::DiskImageLock)?;
480
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700481 let disk_file = disk::create_disk_file(raw_image).map_err(Error::CreateDiskError)?;
Daniel Verkampe73c80f2019-11-08 10:11:16 -0800482 let dev = virtio::Block::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100483 virtio::base_features(cfg.protected_vm),
Daniel Verkampe73c80f2019-11-08 10:11:16 -0800484 disk_file,
485 disk.read_only,
486 disk.sparse,
Daniel Verkamp27672232019-12-06 17:26:55 +1100487 disk.block_size,
Daniel Verkamp4e1f99a2020-06-01 12:47:21 -0700488 disk.id,
Daniel Verkampe73c80f2019-11-08 10:11:16 -0800489 Some(disk_device_socket),
490 )
491 .map_err(Error::BlockDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800492
493 Ok(VirtioDeviceStub {
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700494 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800495 jail: simple_jail(&cfg, "block_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800496 })
497}
498
499fn create_rng_device(cfg: &Config) -> DeviceResult {
Keiichi Watanabef70350b2020-11-24 21:57:53 +0900500 let dev =
501 virtio::Rng::new(virtio::base_features(cfg.protected_vm)).map_err(Error::RngDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800502
503 Ok(VirtioDeviceStub {
504 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800505 jail: simple_jail(&cfg, "rng_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800506 })
507}
508
509#[cfg(feature = "tpm")]
510fn create_tpm_device(cfg: &Config) -> DeviceResult {
Michael Hoyle6b196952020-08-02 20:09:41 -0700511 use base::chown;
David Tolnay2b089fc2019-03-04 15:33:22 -0800512 use std::ffi::CString;
513 use std::fs;
514 use std::process;
David Tolnay2b089fc2019-03-04 15:33:22 -0800515
516 let tpm_storage: PathBuf;
Matt Delco45caf912019-11-13 08:11:09 -0800517 let mut tpm_jail = simple_jail(&cfg, "tpm_device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800518
519 match &mut tpm_jail {
520 Some(jail) => {
521 // Create a tmpfs in the device's root directory for tpm
522 // simulator storage. The size is 20*1024, or 20 KB.
523 jail.mount_with_data(
524 Path::new("none"),
525 Path::new("/"),
526 "tmpfs",
527 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
528 "size=20480",
529 )?;
530
531 let crosvm_ids = add_crosvm_user_to_jail(jail, "tpm")?;
532
533 let pid = process::id();
534 let tpm_pid_dir = format!("/run/vm/tpm.{}", pid);
535 tpm_storage = Path::new(&tpm_pid_dir).to_owned();
David Tolnayfd0971d2019-03-04 17:15:57 -0800536 fs::create_dir_all(&tpm_storage)
537 .map_err(|e| Error::CreateTpmStorage(tpm_storage.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800538 let tpm_pid_dir_c = CString::new(tpm_pid_dir).expect("no nul bytes");
David Tolnayfd0971d2019-03-04 17:15:57 -0800539 chown(&tpm_pid_dir_c, crosvm_ids.uid, crosvm_ids.gid)
540 .map_err(Error::ChownTpmStorage)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800541
542 jail.mount_bind(&tpm_storage, &tpm_storage, true)?;
543 }
544 None => {
545 // Path used inside cros_sdk which does not have /run/vm.
546 tpm_storage = Path::new("/tmp/tpm-simulator").to_owned();
547 }
548 }
549
550 let dev = virtio::Tpm::new(tpm_storage);
551
552 Ok(VirtioDeviceStub {
553 dev: Box::new(dev),
554 jail: tpm_jail,
555 })
556}
557
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800558fn create_single_touch_device(cfg: &Config, single_touch_spec: &TouchDeviceOption) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800559 let socket = single_touch_spec
560 .get_path()
561 .into_unix_stream()
562 .map_err(|e| {
563 error!("failed configuring virtio single touch: {:?}", e);
564 e
565 })?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800566
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800567 let (width, height) = single_touch_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700568 let dev = virtio::new_single_touch(
569 socket,
570 width,
571 height,
572 virtio::base_features(cfg.protected_vm),
573 )
574 .map_err(Error::InputDeviceNew)?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800575 Ok(VirtioDeviceStub {
576 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800577 jail: simple_jail(&cfg, "input_device")?,
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800578 })
579}
580
581fn create_trackpad_device(cfg: &Config, trackpad_spec: &TouchDeviceOption) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800582 let socket = trackpad_spec.get_path().into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800583 error!("failed configuring virtio trackpad: {}", e);
584 e
585 })?;
586
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800587 let (width, height) = trackpad_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700588 let dev = virtio::new_trackpad(
589 socket,
590 width,
591 height,
592 virtio::base_features(cfg.protected_vm),
593 )
594 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800595
596 Ok(VirtioDeviceStub {
597 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800598 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800599 })
600}
601
Zach Reizner65b98f12019-11-22 17:34:58 -0800602fn create_mouse_device<T: IntoUnixStream>(cfg: &Config, mouse_socket: T) -> DeviceResult {
603 let socket = mouse_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800604 error!("failed configuring virtio mouse: {}", e);
605 e
606 })?;
607
Noah Goldd4ca29b2020-10-27 12:21:52 -0700608 let dev = virtio::new_mouse(socket, virtio::base_features(cfg.protected_vm))
609 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800610
611 Ok(VirtioDeviceStub {
612 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800613 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800614 })
615}
616
Zach Reizner65b98f12019-11-22 17:34:58 -0800617fn create_keyboard_device<T: IntoUnixStream>(cfg: &Config, keyboard_socket: T) -> DeviceResult {
618 let socket = keyboard_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800619 error!("failed configuring virtio keyboard: {}", e);
620 e
621 })?;
622
Noah Goldd4ca29b2020-10-27 12:21:52 -0700623 let dev = virtio::new_keyboard(socket, virtio::base_features(cfg.protected_vm))
624 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800625
626 Ok(VirtioDeviceStub {
627 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800628 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800629 })
630}
631
632fn create_vinput_device(cfg: &Config, dev_path: &Path) -> DeviceResult {
633 let dev_file = OpenOptions::new()
634 .read(true)
635 .write(true)
636 .open(dev_path)
David Tolnayfd0971d2019-03-04 17:15:57 -0800637 .map_err(|e| Error::OpenVinput(dev_path.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800638
Noah Goldd4ca29b2020-10-27 12:21:52 -0700639 let dev = virtio::new_evdev(dev_file, virtio::base_features(cfg.protected_vm))
640 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800641
642 Ok(VirtioDeviceStub {
643 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800644 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800645 })
646}
647
Jakub Staron1f828d72019-04-11 12:49:29 -0700648fn create_balloon_device(cfg: &Config, socket: BalloonControlResponseSocket) -> DeviceResult {
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100649 let dev = virtio::Balloon::new(virtio::base_features(cfg.protected_vm), socket)
650 .map_err(Error::BalloonDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800651
652 Ok(VirtioDeviceStub {
653 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800654 jail: simple_jail(&cfg, "balloon_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800655 })
656}
657
Michael Hoylea596a072020-11-10 19:32:45 -0800658fn create_tap_net_device(cfg: &Config, tap_fd: RawDescriptor) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800659 // Safe because we ensure that we get a unique handle to the fd.
660 let tap = unsafe {
Michael Hoylea596a072020-11-10 19:32:45 -0800661 Tap::from_raw_descriptor(
662 validate_raw_descriptor(tap_fd).map_err(Error::ValidateRawDescriptor)?,
663 )
664 .map_err(Error::CreateTapDevice)?
David Tolnay2b089fc2019-03-04 15:33:22 -0800665 };
666
Xiong Zhang773c7072020-03-20 10:39:55 +0800667 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
668 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700669 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800670 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
671 vq_pairs = 1;
672 }
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100673 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +0100674 let dev = virtio::Net::from(features, tap, vq_pairs).map_err(Error::NetDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800675
676 Ok(VirtioDeviceStub {
677 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800678 jail: simple_jail(&cfg, "net_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800679 })
680}
681
682fn create_net_device(
683 cfg: &Config,
684 host_ip: Ipv4Addr,
685 netmask: Ipv4Addr,
686 mac_address: MacAddress,
687 mem: &GuestMemory,
688) -> DeviceResult {
Xiong Zhang773c7072020-03-20 10:39:55 +0800689 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
690 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700691 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800692 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
693 vq_pairs = 1;
694 }
695
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100696 let features = virtio::base_features(cfg.protected_vm);
David Tolnay2b089fc2019-03-04 15:33:22 -0800697 let dev = if cfg.vhost_net {
Will Deacon81d5adb2020-10-06 18:37:48 +0100698 let dev = virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(
699 features,
700 host_ip,
701 netmask,
702 mac_address,
703 mem,
704 )
705 .map_err(Error::VhostNetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800706 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800707 } else {
Will Deacon81d5adb2020-10-06 18:37:48 +0100708 let dev = virtio::Net::<Tap>::new(features, host_ip, netmask, mac_address, vq_pairs)
Xiong Zhang773c7072020-03-20 10:39:55 +0800709 .map_err(Error::NetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800710 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800711 };
712
713 let policy = if cfg.vhost_net {
Matt Delco45caf912019-11-13 08:11:09 -0800714 "vhost_net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800715 } else {
Matt Delco45caf912019-11-13 08:11:09 -0800716 "net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800717 };
718
719 Ok(VirtioDeviceStub {
720 dev,
721 jail: simple_jail(&cfg, policy)?,
722 })
723}
724
725#[cfg(feature = "gpu")]
726fn create_gpu_device(
727 cfg: &Config,
Michael Hoyle685316f2020-09-16 15:29:20 -0700728 exit_evt: &Event,
Gurchetan Singh7ec58fa2019-05-15 15:30:38 -0700729 gpu_device_socket: VmMemoryControlRequestSocket,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900730 gpu_sockets: Vec<virtio::resource_bridge::ResourceResponseSocket>,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900731 wayland_socket_path: Option<&PathBuf>,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700732 x_display: Option<String>,
Zach Reizner65b98f12019-11-22 17:34:58 -0800733 event_devices: Vec<EventDevice>,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700734 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800735) -> DeviceResult {
736 let jailed_wayland_path = Path::new("/wayland-0");
737
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700738 let mut display_backends = vec![
739 virtio::DisplayBackend::X(x_display),
Jason Macnak60eb1fb2020-01-09 14:36:29 -0800740 virtio::DisplayBackend::Stub,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700741 ];
742
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900743 if let Some(socket_path) = wayland_socket_path {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700744 display_backends.insert(
745 0,
746 virtio::DisplayBackend::Wayland(if cfg.sandbox {
747 Some(jailed_wayland_path.to_owned())
748 } else {
749 Some(socket_path.to_owned())
750 }),
751 );
752 }
753
David Tolnay2b089fc2019-03-04 15:33:22 -0800754 let dev = virtio::Gpu::new(
Michael Hoyle685316f2020-09-16 15:29:20 -0700755 exit_evt.try_clone().map_err(Error::CloneEvent)?,
Gurchetan Singh7ec58fa2019-05-15 15:30:38 -0700756 Some(gpu_device_socket),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700757 NonZeroU8::new(1).unwrap(), // number of scanouts
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900758 gpu_sockets,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700759 display_backends,
Jason Macnakcc7070b2019-11-06 14:48:12 -0800760 cfg.gpu_parameters.as_ref().unwrap(),
Zach Reizner65b98f12019-11-22 17:34:58 -0800761 event_devices,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700762 map_request,
763 cfg.sandbox,
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100764 virtio::base_features(cfg.protected_vm),
David Tolnay2b089fc2019-03-04 15:33:22 -0800765 );
766
Matt Delco45caf912019-11-13 08:11:09 -0800767 let jail = match simple_jail(&cfg, "gpu_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800768 Some(mut jail) => {
769 // Create a tmpfs in the device's root directory so that we can bind mount the
770 // dri directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
771 jail.mount_with_data(
772 Path::new("none"),
773 Path::new("/"),
774 "tmpfs",
775 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
776 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800777 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800778
779 // Device nodes required for DRM.
780 let sys_dev_char_path = Path::new("/sys/dev/char");
David Tolnayfd0971d2019-03-04 17:15:57 -0800781 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800782 let sys_devices_path = Path::new("/sys/devices");
David Tolnayfd0971d2019-03-04 17:15:57 -0800783 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
Jason Macnak23400522020-08-28 09:10:46 -0700784
David Tolnay2b089fc2019-03-04 15:33:22 -0800785 let drm_dri_path = Path::new("/dev/dri");
Jason Macnak23400522020-08-28 09:10:46 -0700786 if drm_dri_path.exists() {
787 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
788 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800789
John Batesb220eac2020-09-14 17:03:02 -0700790 // Prepare GPU shader disk cache directory.
791 if let Some(cache_dir) = cfg
792 .gpu_parameters
793 .as_ref()
794 .and_then(|params| params.cache_path.as_ref())
795 {
796 if cfg!(any(target_arch = "arm", target_arch = "aarch64")) && cfg.sandbox {
797 warn!("shader caching not yet supported on ARM with sandbox enabled");
798 env::set_var("MESA_GLSL_CACHE_DISABLE", "true");
799 } else {
John Bates04059732020-10-01 15:58:55 -0700800 env::set_var("MESA_GLSL_CACHE_DISABLE", "false");
John Batesb220eac2020-09-14 17:03:02 -0700801 env::set_var("MESA_GLSL_CACHE_DIR", cache_dir);
802 if let Some(cache_size) = cfg
803 .gpu_parameters
804 .as_ref()
805 .and_then(|params| params.cache_size.as_ref())
806 {
807 env::set_var("MESA_GLSL_CACHE_MAX_SIZE", cache_size);
808 }
809 let shadercache_path = Path::new(cache_dir);
810 jail.mount_bind(shadercache_path, shadercache_path, true)?;
811 }
812 }
813
David Riley06787c52019-07-24 12:09:07 -0700814 // If the ARM specific devices exist on the host, bind mount them in.
815 let mali0_path = Path::new("/dev/mali0");
816 if mali0_path.exists() {
817 jail.mount_bind(mali0_path, mali0_path, true)?;
818 }
819
820 let pvr_sync_path = Path::new("/dev/pvr_sync");
821 if pvr_sync_path.exists() {
822 jail.mount_bind(pvr_sync_path, pvr_sync_path, true)?;
823 }
824
David Tolnay2b089fc2019-03-04 15:33:22 -0800825 // Libraries that are required when mesa drivers are dynamically loaded.
David Riley06787c52019-07-24 12:09:07 -0700826 let lib_dirs = &["/usr/lib", "/usr/lib64", "/lib", "/lib64"];
827 for dir in lib_dirs {
828 let dir_path = Path::new(dir);
829 if dir_path.exists() {
830 jail.mount_bind(dir_path, dir_path, false)?;
831 }
832 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800833
834 // Bind mount the wayland socket into jail's root. This is necessary since each
835 // new wayland context must open() the socket.
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700836 if let Some(path) = wayland_socket_path {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900837 jail.mount_bind(path, jailed_wayland_path, true)?;
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700838 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800839
840 add_crosvm_user_to_jail(&mut jail, "gpu")?;
841
David Riley54e660b2019-07-24 17:22:50 -0700842 // pvr driver requires read access to /proc/self/task/*/comm.
843 let proc_path = Path::new("/proc");
844 jail.mount(
845 proc_path,
846 proc_path,
847 "proc",
848 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY) as usize,
849 )?;
850
David Tolnay2b089fc2019-03-04 15:33:22 -0800851 Some(jail)
852 }
853 None => None,
854 };
855
856 Ok(VirtioDeviceStub {
857 dev: Box::new(dev),
858 jail,
859 })
860}
861
862fn create_wayland_device(
863 cfg: &Config,
Gurchetan Singh53edb812019-05-22 08:57:16 -0700864 socket: VmMemoryControlRequestSocket,
David Tolnay2b089fc2019-03-04 15:33:22 -0800865 resource_bridge: Option<virtio::resource_bridge::ResourceRequestSocket>,
866) -> DeviceResult {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900867 let wayland_socket_dirs = cfg
868 .wayland_socket_paths
869 .iter()
870 .map(|(_name, path)| path.parent())
871 .collect::<Option<Vec<_>>>()
872 .ok_or(Error::InvalidWaylandPath)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800873
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100874 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +0100875 let dev = virtio::Wl::new(
876 features,
877 cfg.wayland_socket_paths.clone(),
878 socket,
879 resource_bridge,
880 )
881 .map_err(Error::WaylandDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800882
Matt Delco45caf912019-11-13 08:11:09 -0800883 let jail = match simple_jail(&cfg, "wl_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800884 Some(mut jail) => {
885 // Create a tmpfs in the device's root directory so that we can bind mount the wayland
886 // socket directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
887 jail.mount_with_data(
888 Path::new("none"),
889 Path::new("/"),
890 "tmpfs",
891 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
892 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800893 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800894
895 // Bind mount the wayland socket's directory into jail's root. This is necessary since
896 // each new wayland context must open() the socket. If the wayland socket is ever
897 // destroyed and remade in the same host directory, new connections will be possible
898 // without restarting the wayland device.
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900899 for dir in &wayland_socket_dirs {
900 jail.mount_bind(dir, dir, true)?;
901 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800902 add_crosvm_user_to_jail(&mut jail, "Wayland")?;
903
904 Some(jail)
905 }
906 None => None,
907 };
908
909 Ok(VirtioDeviceStub {
910 dev: Box::new(dev),
911 jail,
912 })
913}
914
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900915#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
916fn create_video_device(
917 cfg: &Config,
918 typ: devices::virtio::VideoDeviceType,
919 resource_bridge: virtio::resource_bridge::ResourceRequestSocket,
920) -> DeviceResult {
921 let jail = match simple_jail(&cfg, "video_device")? {
922 Some(mut jail) => {
923 match typ {
924 devices::virtio::VideoDeviceType::Decoder => {
925 add_crosvm_user_to_jail(&mut jail, "video-decoder")?
926 }
927 devices::virtio::VideoDeviceType::Encoder => {
928 add_crosvm_user_to_jail(&mut jail, "video-encoder")?
929 }
930 };
931
932 // Create a tmpfs in the device's root directory so that we can bind mount files.
933 jail.mount_with_data(
934 Path::new("none"),
935 Path::new("/"),
936 "tmpfs",
937 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
938 "size=67108864",
939 )?;
940
941 // Render node for libvda.
942 let dev_dri_path = Path::new("/dev/dri/renderD128");
943 jail.mount_bind(dev_dri_path, dev_dri_path, false)?;
944
David Stevense341d0a2020-10-08 18:02:32 +0900945 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
946 {
947 // Device nodes used by libdrm through minigbm in libvda on AMD devices.
948 let sys_dev_char_path = Path::new("/sys/dev/char");
949 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
950 let sys_devices_path = Path::new("/sys/devices");
951 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
952
953 // Required for loading dri libraries loaded by minigbm on AMD devices.
954 let lib_dir = Path::new("/usr/lib64");
955 jail.mount_bind(lib_dir, lib_dir, false)?;
956 }
957
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900958 // Device nodes required by libchrome which establishes Mojo connection in libvda.
959 let dev_urandom_path = Path::new("/dev/urandom");
960 jail.mount_bind(dev_urandom_path, dev_urandom_path, false)?;
961 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
962 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
963
964 Some(jail)
965 }
966 None => None,
967 };
968
969 Ok(VirtioDeviceStub {
970 dev: Box::new(devices::virtio::VideoDevice::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100971 virtio::base_features(cfg.protected_vm),
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900972 typ,
973 Some(resource_bridge),
974 )),
975 jail,
976 })
977}
978
979#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
980fn register_video_device(
981 devs: &mut Vec<VirtioDeviceStub>,
982 resource_bridges: &mut Vec<virtio::resource_bridge::ResourceResponseSocket>,
983 cfg: &Config,
984 typ: devices::virtio::VideoDeviceType,
985) -> std::result::Result<(), Error> {
986 let (video_socket, gpu_socket) =
987 virtio::resource_bridge::pair().map_err(Error::CreateSocket)?;
988 resource_bridges.push(gpu_socket);
989 devs.push(create_video_device(cfg, typ, video_socket)?);
990 Ok(())
991}
992
David Tolnay2b089fc2019-03-04 15:33:22 -0800993fn create_vhost_vsock_device(cfg: &Config, cid: u64, mem: &GuestMemory) -> DeviceResult {
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100994 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +0100995 let dev = virtio::vhost::Vsock::new(features, cid, mem).map_err(Error::VhostVsockDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800996
997 Ok(VirtioDeviceStub {
998 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800999 jail: simple_jail(&cfg, "vhost_vsock_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -08001000 })
1001}
1002
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001003fn create_fs_device(
1004 cfg: &Config,
1005 uid_map: &str,
1006 gid_map: &str,
1007 src: &Path,
1008 tag: &str,
1009 fs_cfg: virtio::fs::passthrough::Config,
1010) -> DeviceResult {
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001011 let max_open_files = get_max_open_files()?;
Matt Delcoc24ad782020-02-14 13:24:36 -08001012 let j = if cfg.sandbox {
1013 let seccomp_policy = cfg.seccomp_policy_dir.join("fs_device");
1014 let config = SandboxConfig {
1015 limit_caps: false,
1016 uid_map: Some(uid_map),
1017 gid_map: Some(gid_map),
1018 log_failures: cfg.seccomp_log_failures,
1019 seccomp_policy: &seccomp_policy,
1020 };
Chirantan Ekbote34d45e52020-04-20 18:15:02 +09001021 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
1022 // We want bind mounts from the parent namespaces to propagate into the fs device's
1023 // namespace.
1024 jail.set_remount_mode(libc::MS_SLAVE);
1025
1026 jail
Matt Delcoc24ad782020-02-14 13:24:36 -08001027 } else {
1028 create_base_minijail(src, Some(max_open_files), None)?
1029 };
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001030
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001031 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001032 // TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic
1033 // when num_queues > 1.
Will Deacon81d5adb2020-10-06 18:37:48 +01001034 let dev = virtio::fs::Fs::new(features, tag, 1, fs_cfg).map_err(Error::FsDeviceNew)?;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001035
1036 Ok(VirtioDeviceStub {
1037 dev: Box::new(dev),
1038 jail: Some(j),
1039 })
1040}
1041
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001042fn create_9p_device(
1043 cfg: &Config,
1044 uid_map: &str,
1045 gid_map: &str,
1046 src: &Path,
1047 tag: &str,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001048 mut p9_cfg: p9::Config,
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001049) -> DeviceResult {
1050 let max_open_files = get_max_open_files()?;
1051 let (jail, root) = if cfg.sandbox {
1052 let seccomp_policy = cfg.seccomp_policy_dir.join("9p_device");
1053 let config = SandboxConfig {
1054 limit_caps: false,
1055 uid_map: Some(uid_map),
1056 gid_map: Some(gid_map),
1057 log_failures: cfg.seccomp_log_failures,
1058 seccomp_policy: &seccomp_policy,
1059 };
David Tolnay2b089fc2019-03-04 15:33:22 -08001060
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001061 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
1062 // We want bind mounts from the parent namespaces to propagate into the 9p server's
1063 // namespace.
1064 jail.set_remount_mode(libc::MS_SLAVE);
Chirantan Ekbote055de382020-01-24 12:16:58 +09001065
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001066 // The shared directory becomes the root of the device's file system.
1067 let root = Path::new("/");
1068 (Some(jail), root)
1069 } else {
1070 // There's no mount namespace so we tell the server to treat the source directory as the
1071 // root.
1072 (None, src)
David Tolnay2b089fc2019-03-04 15:33:22 -08001073 };
1074
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001075 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001076 p9_cfg.root = root.into();
1077 let dev = virtio::P9::new(features, tag, p9_cfg).map_err(Error::P9DeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001078
1079 Ok(VirtioDeviceStub {
1080 dev: Box::new(dev),
1081 jail,
1082 })
1083}
1084
Jakub Starona3411ea2019-04-24 10:55:25 -07001085fn create_pmem_device(
1086 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001087 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001088 resources: &mut SystemAllocator,
1089 disk: &DiskOption,
1090 index: usize,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001091 pmem_device_socket: VmMsyncRequestSocket,
Jakub Starona3411ea2019-04-24 10:55:25 -07001092) -> DeviceResult {
1093 let fd = OpenOptions::new()
1094 .read(true)
1095 .write(!disk.read_only)
1096 .open(&disk.path)
Daniel Verkamp46d61ba2020-02-25 10:17:50 -08001097 .map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001098
Iliyan Malcheved149862020-04-17 23:57:47 +00001099 let arena_size = {
Daniel Verkamp46d61ba2020-02-25 10:17:50 -08001100 let metadata =
1101 std::fs::metadata(&disk.path).map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001102 let disk_len = metadata.len();
1103 // Linux requires pmem region sizes to be 2 MiB aligned. Linux will fill any partial page
1104 // at the end of an mmap'd file and won't write back beyond the actual file length, but if
1105 // we just align the size of the file to 2 MiB then access beyond the last page of the
1106 // mapped file will generate SIGBUS. So use a memory mapping arena that will provide
1107 // padding up to 2 MiB.
1108 let alignment = 2 * 1024 * 1024;
1109 let align_adjust = if disk_len % alignment != 0 {
1110 alignment - (disk_len % alignment)
1111 } else {
1112 0
1113 };
Iliyan Malcheved149862020-04-17 23:57:47 +00001114 disk_len
1115 .checked_add(align_adjust)
1116 .ok_or(Error::PmemDeviceImageTooBig)?
Jakub Starona3411ea2019-04-24 10:55:25 -07001117 };
1118
1119 let protection = {
1120 if disk.read_only {
1121 Protection::read()
1122 } else {
1123 Protection::read_write()
1124 }
1125 };
1126
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001127 let arena = {
Jakub Starona3411ea2019-04-24 10:55:25 -07001128 // Conversion from u64 to usize may fail on 32bit system.
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001129 let arena_size = usize::try_from(arena_size).map_err(|_| Error::PmemDeviceImageTooBig)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001130
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001131 let mut arena = MemoryMappingArena::new(arena_size).map_err(Error::ReservePmemMemory)?;
1132 arena
Iliyan Malcheved149862020-04-17 23:57:47 +00001133 .add_fd_offset_protection(0, arena_size, &fd, 0, protection)
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001134 .map_err(Error::ReservePmemMemory)?;
1135 arena
Jakub Starona3411ea2019-04-24 10:55:25 -07001136 };
1137
1138 let mapping_address = resources
Xiong Zhang383b3b52019-10-30 14:59:26 +08001139 .mmio_allocator(MmioType::High)
Jakub Starona3411ea2019-04-24 10:55:25 -07001140 .allocate_with_align(
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001141 arena_size,
Jakub Starona3411ea2019-04-24 10:55:25 -07001142 Alloc::PmemDevice(index),
1143 format!("pmem_disk_image_{}", index),
1144 // Linux kernel requires pmem namespaces to be 128 MiB aligned.
1145 128 * 1024 * 1024, /* 128 MiB */
1146 )
1147 .map_err(Error::AllocatePmemDeviceAddress)?;
1148
Daniel Verkampe1980a92020-02-07 11:00:55 -08001149 let slot = vm
Gurchetan Singh173fe622020-05-21 18:05:06 -07001150 .add_memory_region(
Daniel Verkampe1980a92020-02-07 11:00:55 -08001151 GuestAddress(mapping_address),
Gurchetan Singh173fe622020-05-21 18:05:06 -07001152 Box::new(arena),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001153 /* read_only = */ disk.read_only,
1154 /* log_dirty_pages = */ false,
1155 )
1156 .map_err(Error::AddPmemDeviceMemory)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001157
Daniel Verkampe1980a92020-02-07 11:00:55 -08001158 let dev = virtio::Pmem::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001159 virtio::base_features(cfg.protected_vm),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001160 fd,
1161 GuestAddress(mapping_address),
1162 slot,
1163 arena_size,
1164 Some(pmem_device_socket),
1165 )
1166 .map_err(Error::PmemDeviceNew)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001167
1168 Ok(VirtioDeviceStub {
1169 dev: Box::new(dev) as Box<dyn VirtioDevice>,
Matt Delco45caf912019-11-13 08:11:09 -08001170 jail: simple_jail(&cfg, "pmem_device")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07001171 })
1172}
1173
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001174fn create_console_device(cfg: &Config, param: &SerialParameters) -> DeviceResult {
Michael Hoylecd23bc22020-10-20 22:12:20 -07001175 let mut keep_rds = Vec::new();
Michael Hoyle685316f2020-09-16 15:29:20 -07001176 let evt = Event::new().map_err(Error::CreateEvent)?;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001177 let dev = param
Michael Hoylecd23bc22020-10-20 22:12:20 -07001178 .create_serial_device::<Console>(cfg.protected_vm, &evt, &mut keep_rds)
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001179 .map_err(Error::CreateConsole)?;
1180
Nicholas Verne71e73d82020-07-08 17:19:55 +10001181 let jail = match simple_jail(&cfg, "serial")? {
1182 Some(mut jail) => {
1183 // Create a tmpfs in the device's root directory so that we can bind mount the
1184 // log socket directory into it.
1185 // The size=67108864 is size=64*1024*1024 or size=64MB.
1186 jail.mount_with_data(
1187 Path::new("none"),
1188 Path::new("/"),
1189 "tmpfs",
1190 (libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_NOSUID) as usize,
1191 "size=67108864",
1192 )?;
1193 add_crosvm_user_to_jail(&mut jail, "serial")?;
1194 let res = param.add_bind_mounts(&mut jail);
1195 if res.is_err() {
1196 error!("failed to add bind mounts for console device");
1197 }
1198 Some(jail)
1199 }
1200 None => None,
1201 };
1202
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001203 Ok(VirtioDeviceStub {
1204 dev: Box::new(dev),
Nicholas Verne71e73d82020-07-08 17:19:55 +10001205 jail, // TODO(dverkamp): use a separate policy for console?
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001206 })
1207}
1208
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -07001209// gpu_device_socket is not used when GPU support is disabled.
1210#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001211fn create_virtio_devices(
1212 cfg: &Config,
Zach Reizner55a9e502018-10-03 10:22:32 -07001213 mem: &GuestMemory,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001214 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001215 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001216 _exit_evt: &Event,
Gurchetan Singh53edb812019-05-22 08:57:16 -07001217 wayland_device_socket: VmMemoryControlRequestSocket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001218 gpu_device_socket: VmMemoryControlRequestSocket,
Jakub Staron1f828d72019-04-11 12:49:29 -07001219 balloon_device_socket: BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07001220 disk_device_sockets: &mut Vec<DiskControlResponseSocket>,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001221 pmem_device_sockets: &mut Vec<VmMsyncRequestSocket>,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001222 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnay2b089fc2019-03-04 15:33:22 -08001223) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -07001224 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -08001225
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001226 for (_, param) in cfg
1227 .serial_parameters
1228 .iter()
1229 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
1230 {
1231 let dev = create_console_device(cfg, param)?;
1232 devs.push(dev);
1233 }
1234
Zach Reizner8fb52112017-12-13 16:04:39 -08001235 for disk in &cfg.disks {
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001236 let disk_device_socket = disk_device_sockets.remove(0);
David Tolnay2b089fc2019-03-04 15:33:22 -08001237 devs.push(create_block_device(cfg, disk, disk_device_socket)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001238 }
1239
Jakub Starona3411ea2019-04-24 10:55:25 -07001240 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
Daniel Verkampe1980a92020-02-07 11:00:55 -08001241 let pmem_device_socket = pmem_device_sockets.remove(0);
1242 devs.push(create_pmem_device(
1243 cfg,
1244 vm,
1245 resources,
1246 pmem_disk,
1247 index,
1248 pmem_device_socket,
1249 )?);
Jakub Starona3411ea2019-04-24 10:55:25 -07001250 }
1251
David Tolnay2b089fc2019-03-04 15:33:22 -08001252 devs.push(create_rng_device(cfg)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001253
David Tolnayde6b29a2018-12-20 11:49:46 -08001254 #[cfg(feature = "tpm")]
1255 {
David Tolnay43f8e212019-02-13 17:28:16 -08001256 if cfg.software_tpm {
David Tolnay2b089fc2019-03-04 15:33:22 -08001257 devs.push(create_tpm_device(cfg)?);
David Tolnay43f8e212019-02-13 17:28:16 -08001258 }
David Tolnayde6b29a2018-12-20 11:49:46 -08001259 }
1260
Jorge E. Moreira99d3f082019-03-07 10:59:54 -08001261 if let Some(single_touch_spec) = &cfg.virtio_single_touch {
1262 devs.push(create_single_touch_device(cfg, single_touch_spec)?);
1263 }
1264
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001265 if let Some(trackpad_spec) = &cfg.virtio_trackpad {
David Tolnay2b089fc2019-03-04 15:33:22 -08001266 devs.push(create_trackpad_device(cfg, trackpad_spec)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001267 }
1268
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001269 if let Some(mouse_socket) = &cfg.virtio_mouse {
David Tolnay2b089fc2019-03-04 15:33:22 -08001270 devs.push(create_mouse_device(cfg, mouse_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001271 }
1272
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001273 if let Some(keyboard_socket) = &cfg.virtio_keyboard {
David Tolnay2b089fc2019-03-04 15:33:22 -08001274 devs.push(create_keyboard_device(cfg, keyboard_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001275 }
1276
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001277 for dev_path in &cfg.virtio_input_evdevs {
David Tolnay2b089fc2019-03-04 15:33:22 -08001278 devs.push(create_vinput_device(cfg, dev_path)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001279 }
1280
David Tolnay2b089fc2019-03-04 15:33:22 -08001281 devs.push(create_balloon_device(cfg, balloon_device_socket)?);
Dylan Reid295ccac2017-11-06 14:06:24 -08001282
Zach Reizner39aa26b2017-12-12 18:03:23 -08001283 // We checked above that if the IP is defined, then the netmask is, too.
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001284 for tap_fd in &cfg.tap_fd {
David Tolnay2b089fc2019-03-04 15:33:22 -08001285 devs.push(create_tap_net_device(cfg, *tap_fd)?);
Jorge E. Moreirab7952802019-02-12 16:43:05 -08001286 }
1287
David Tolnay2b089fc2019-03-04 15:33:22 -08001288 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
1289 (cfg.host_ip, cfg.netmask, cfg.mac_address)
1290 {
1291 devs.push(create_net_device(cfg, host_ip, netmask, mac_address, mem)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001292 }
1293
David Tolnayfa701712019-02-13 16:42:54 -08001294 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001295 let mut resource_bridges = Vec::<virtio::resource_bridge::ResourceResponseSocket>::new();
1296
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001297 if !cfg.wayland_socket_paths.is_empty() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001298 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
1299 let mut wl_resource_bridge = None::<virtio::resource_bridge::ResourceRequestSocket>;
1300
1301 #[cfg(feature = "gpu")]
1302 {
Jason Macnakcc7070b2019-11-06 14:48:12 -08001303 if cfg.gpu_parameters.is_some() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001304 let (wl_socket, gpu_socket) =
1305 virtio::resource_bridge::pair().map_err(Error::CreateSocket)?;
1306 resource_bridges.push(gpu_socket);
1307 wl_resource_bridge = Some(wl_socket);
1308 }
1309 }
1310
1311 devs.push(create_wayland_device(
1312 cfg,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001313 wayland_device_socket,
1314 wl_resource_bridge,
1315 )?);
1316 }
David Tolnayfa701712019-02-13 16:42:54 -08001317
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001318 #[cfg(feature = "video-decoder")]
1319 {
1320 if cfg.video_dec {
1321 register_video_device(
1322 &mut devs,
1323 &mut resource_bridges,
1324 cfg,
1325 devices::virtio::VideoDeviceType::Decoder,
1326 )?;
1327 }
1328 }
1329
1330 #[cfg(feature = "video-encoder")]
1331 {
1332 if cfg.video_enc {
1333 register_video_device(
1334 &mut devs,
1335 &mut resource_bridges,
1336 cfg,
1337 devices::virtio::VideoDeviceType::Encoder,
1338 )?;
1339 }
1340 }
1341
Zach Reizner3a8100a2017-09-13 19:15:43 -07001342 #[cfg(feature = "gpu")]
1343 {
Noah Golddc7f52b2020-02-01 13:01:58 -08001344 if let Some(gpu_parameters) = &cfg.gpu_parameters {
Zach Reizner65b98f12019-11-22 17:34:58 -08001345 let mut event_devices = Vec::new();
1346 if cfg.display_window_mouse {
1347 let (event_device_socket, virtio_dev_socket) =
1348 UnixStream::pair().map_err(Error::CreateSocket)?;
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001349 let (single_touch_width, single_touch_height) = cfg
1350 .virtio_single_touch
1351 .as_ref()
1352 .map(|single_touch_spec| single_touch_spec.get_size())
Noah Golddc7f52b2020-02-01 13:01:58 -08001353 .unwrap_or((gpu_parameters.display_width, gpu_parameters.display_height));
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001354 let dev = virtio::new_single_touch(
1355 virtio_dev_socket,
1356 single_touch_width,
1357 single_touch_height,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001358 virtio::base_features(cfg.protected_vm),
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001359 )
1360 .map_err(Error::InputDeviceNew)?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001361 devs.push(VirtioDeviceStub {
1362 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -08001363 jail: simple_jail(&cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001364 });
1365 event_devices.push(EventDevice::touchscreen(event_device_socket));
1366 }
1367 if cfg.display_window_keyboard {
1368 let (event_device_socket, virtio_dev_socket) =
1369 UnixStream::pair().map_err(Error::CreateSocket)?;
Noah Goldd4ca29b2020-10-27 12:21:52 -07001370 let dev = virtio::new_keyboard(
1371 virtio_dev_socket,
1372 virtio::base_features(cfg.protected_vm),
1373 )
1374 .map_err(Error::InputDeviceNew)?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001375 devs.push(VirtioDeviceStub {
1376 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -08001377 jail: simple_jail(&cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001378 });
1379 event_devices.push(EventDevice::keyboard(event_device_socket));
1380 }
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001381 devs.push(create_gpu_device(
1382 cfg,
1383 _exit_evt,
1384 gpu_device_socket,
1385 resource_bridges,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001386 // Use the unnamed socket for GPU display screens.
1387 cfg.wayland_socket_paths.get(""),
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001388 cfg.x_display.clone(),
Zach Reizner65b98f12019-11-22 17:34:58 -08001389 event_devices,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001390 map_request,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001391 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -07001392 }
1393 }
1394
Zach Reizneraa575662018-08-15 10:46:32 -07001395 if let Some(cid) = cfg.cid {
David Tolnay2b089fc2019-03-04 15:33:22 -08001396 devs.push(create_vhost_vsock_device(cfg, cid, mem)?);
Zach Reizneraa575662018-08-15 10:46:32 -07001397 }
1398
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001399 for shared_dir in &cfg.shared_dirs {
1400 let SharedDir {
1401 src,
1402 tag,
1403 kind,
1404 uid_map,
1405 gid_map,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001406 fs_cfg,
1407 p9_cfg,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001408 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -08001409
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001410 let dev = match kind {
1411 SharedDirKind::FS => create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone())?,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001412 SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag, p9_cfg.clone())?,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001413 };
1414 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -08001415 }
1416
1417 Ok(devs)
1418}
1419
1420fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -06001421 cfg: &Config,
David Tolnay2b089fc2019-03-04 15:33:22 -08001422 mem: &GuestMemory,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001423 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001424 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001425 exit_evt: &Event,
Xiong Zhanga5d248c2019-09-17 14:17:19 -07001426 control_sockets: &mut Vec<TaggedControlSocket>,
Gurchetan Singh53edb812019-05-22 08:57:16 -07001427 wayland_device_socket: VmMemoryControlRequestSocket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001428 gpu_device_socket: VmMemoryControlRequestSocket,
Jakub Staron1f828d72019-04-11 12:49:29 -07001429 balloon_device_socket: BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07001430 disk_device_sockets: &mut Vec<DiskControlResponseSocket>,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001431 pmem_device_sockets: &mut Vec<VmMsyncRequestSocket>,
Jingkui Wang100e6e42019-03-08 20:41:57 -08001432 usb_provider: HostBackendDeviceProvider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001433 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnayfdac5ed2019-03-08 16:56:14 -08001434) -> DeviceResult<Vec<(Box<dyn PciDevice>, Option<Minijail>)>> {
David Tolnay2b089fc2019-03-04 15:33:22 -08001435 let stubs = create_virtio_devices(
1436 &cfg,
1437 mem,
Jakub Starona3411ea2019-04-24 10:55:25 -07001438 vm,
1439 resources,
David Tolnay2b089fc2019-03-04 15:33:22 -08001440 exit_evt,
1441 wayland_device_socket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001442 gpu_device_socket,
David Tolnay2b089fc2019-03-04 15:33:22 -08001443 balloon_device_socket,
1444 disk_device_sockets,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001445 pmem_device_sockets,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001446 map_request,
David Tolnay2b089fc2019-03-04 15:33:22 -08001447 )?;
1448
1449 let mut pci_devices = Vec::new();
1450
1451 for stub in stubs {
Daniel Verkampbb712d62019-11-19 09:47:33 -08001452 let (msi_host_socket, msi_device_socket) =
1453 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
1454 control_sockets.push(TaggedControlSocket::VmIrq(msi_host_socket));
1455 let dev = VirtioPciDevice::new(mem.clone(), stub.dev, msi_device_socket)
1456 .map_err(Error::VirtioPciDev)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -08001457 let dev = Box::new(dev) as Box<dyn PciDevice>;
David Tolnay2b089fc2019-03-04 15:33:22 -08001458 pci_devices.push((dev, stub.jail));
1459 }
1460
Andrew Scull1590e6f2020-03-18 18:00:47 +00001461 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +08001462 for ac97_param in &cfg.ac97_parameters {
1463 let dev = Ac97Dev::try_new(mem.clone(), ac97_param.clone()).map_err(Error::CreateAc97)?;
paulhsiace17e6e2020-08-28 18:37:45 +08001464 let jail = simple_jail(&cfg, dev.minijail_policy())?;
1465 pci_devices.push((Box::new(dev), jail));
David Tolnay2b089fc2019-03-04 15:33:22 -08001466 }
Andrew Scull1590e6f2020-03-18 18:00:47 +00001467
Jingkui Wang100e6e42019-03-08 20:41:57 -08001468 // Create xhci controller.
1469 let usb_controller = Box::new(XhciController::new(mem.clone(), usb_provider));
Matt Delco45caf912019-11-13 08:11:09 -08001470 pci_devices.push((usb_controller, simple_jail(&cfg, "xhci")?));
David Tolnay2b089fc2019-03-04 15:33:22 -08001471
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001472 if !cfg.vfio.is_empty() {
Xiong Zhangea6cf662019-11-11 18:32:02 +08001473 let vfio_container = Arc::new(Mutex::new(
1474 VfioContainer::new().map_err(Error::CreateVfioDevice)?,
1475 ));
1476
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001477 for vfio_path in &cfg.vfio {
Daniel Verkamp10154a92020-09-28 17:44:40 -07001478 // create MSI, MSI-X, and Mem request sockets for each vfio device
1479 let (vfio_host_socket_msi, vfio_device_socket_msi) =
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001480 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
Daniel Verkamp10154a92020-09-28 17:44:40 -07001481 control_sockets.push(TaggedControlSocket::VmIrq(vfio_host_socket_msi));
1482
1483 let (vfio_host_socket_msix, vfio_device_socket_msix) =
1484 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
1485 control_sockets.push(TaggedControlSocket::VmIrq(vfio_host_socket_msix));
Xiong Zhang4b5bb3a2019-04-23 17:15:21 +08001486
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001487 let (vfio_host_socket_mem, vfio_device_socket_mem) =
1488 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>()
1489 .map_err(Error::CreateSocket)?;
1490 control_sockets.push(TaggedControlSocket::VmMemory(vfio_host_socket_mem));
Xiong Zhang85abeff2019-04-23 17:15:24 +08001491
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001492 let vfiodevice = VfioDevice::new(vfio_path.as_path(), vm, mem, vfio_container.clone())
1493 .map_err(Error::CreateVfioDevice)?;
1494 let vfiopcidevice = Box::new(VfioPciDevice::new(
1495 vfiodevice,
Daniel Verkamp10154a92020-09-28 17:44:40 -07001496 vfio_device_socket_msi,
1497 vfio_device_socket_msix,
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001498 vfio_device_socket_mem,
1499 ));
1500 pci_devices.push((vfiopcidevice, simple_jail(&cfg, "vfio_device")?));
1501 }
Xiong Zhang17b0daf2019-04-23 17:14:50 +08001502 }
1503
David Tolnay2b089fc2019-03-04 15:33:22 -08001504 Ok(pci_devices)
1505}
1506
1507#[derive(Copy, Clone)]
Chirantan Ekbote1a2683b2019-11-26 16:28:23 +09001508#[cfg_attr(not(feature = "tpm"), allow(dead_code))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001509struct Ids {
1510 uid: uid_t,
1511 gid: gid_t,
1512}
1513
David Tolnay48c48292019-03-01 16:54:25 -08001514// Set the uid/gid for the jailed process and give a basic id map. This is
1515// required for bind mounts to work.
David Tolnayfd0971d2019-03-04 17:15:57 -08001516fn add_crosvm_user_to_jail(jail: &mut Minijail, feature: &str) -> Result<Ids> {
David Tolnay48c48292019-03-01 16:54:25 -08001517 let crosvm_user_group = CStr::from_bytes_with_nul(b"crosvm\0").unwrap();
1518
1519 let crosvm_uid = match get_user_id(&crosvm_user_group) {
1520 Ok(u) => u,
1521 Err(e) => {
1522 warn!("falling back to current user id for {}: {}", feature, e);
1523 geteuid()
1524 }
1525 };
1526
1527 let crosvm_gid = match get_group_id(&crosvm_user_group) {
1528 Ok(u) => u,
1529 Err(e) => {
1530 warn!("falling back to current group id for {}: {}", feature, e);
1531 getegid()
1532 }
1533 };
1534
1535 jail.change_uid(crosvm_uid);
1536 jail.change_gid(crosvm_gid);
1537 jail.uidmap(&format!("{0} {0} 1", crosvm_uid))
1538 .map_err(Error::SettingUidMap)?;
1539 jail.gidmap(&format!("{0} {0} 1", crosvm_gid))
1540 .map_err(Error::SettingGidMap)?;
1541
David Tolnay41a6f842019-03-01 16:18:44 -08001542 Ok(Ids {
1543 uid: crosvm_uid,
1544 gid: crosvm_gid,
1545 })
David Tolnay48c48292019-03-01 16:54:25 -08001546}
1547
Michael Hoylea596a072020-11-10 19:32:45 -08001548fn raw_descriptor_from_path(path: &Path) -> Result<RawDescriptor> {
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001549 if !path.is_file() {
David Tolnayfd0971d2019-03-04 17:15:57 -08001550 return Err(Error::InvalidFdPath);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001551 }
Michael Hoylea596a072020-11-10 19:32:45 -08001552 let raw_descriptor = path
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001553 .file_name()
1554 .and_then(|fd_osstr| fd_osstr.to_str())
1555 .and_then(|fd_str| fd_str.parse::<c_int>().ok())
1556 .ok_or(Error::InvalidFdPath)?;
Michael Hoylea596a072020-11-10 19:32:45 -08001557 validate_raw_descriptor(raw_descriptor).map_err(Error::ValidateRawDescriptor)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001558}
1559
Zach Reizner65b98f12019-11-22 17:34:58 -08001560trait IntoUnixStream {
1561 fn into_unix_stream(self) -> Result<UnixStream>;
1562}
1563
1564impl<'a> IntoUnixStream for &'a Path {
1565 fn into_unix_stream(self) -> Result<UnixStream> {
1566 if self.parent() == Some(Path::new("/proc/self/fd")) {
1567 // Safe because we will validate |raw_fd|.
Michael Hoylea596a072020-11-10 19:32:45 -08001568 unsafe { Ok(UnixStream::from_raw_fd(raw_descriptor_from_path(self)?)) }
Zach Reizner65b98f12019-11-22 17:34:58 -08001569 } else {
1570 UnixStream::connect(self).map_err(Error::InputEventsOpen)
1571 }
1572 }
1573}
1574impl<'a> IntoUnixStream for &'a PathBuf {
1575 fn into_unix_stream(self) -> Result<UnixStream> {
1576 self.as_path().into_unix_stream()
1577 }
1578}
1579
1580impl IntoUnixStream for UnixStream {
1581 fn into_unix_stream(self) -> Result<UnixStream> {
1582 Ok(self)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001583 }
1584}
1585
Steven Richmanf32d0b42020-06-20 21:45:32 -07001586fn setup_vcpu_signal_handler<T: Vcpu>(use_hypervisor_signals: bool) -> Result<()> {
1587 if use_hypervisor_signals {
Matt Delco84cf9c02019-10-07 22:38:13 -07001588 unsafe {
1589 extern "C" fn handle_signal() {}
1590 // Our signal handler does nothing and is trivially async signal safe.
1591 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
1592 .map_err(Error::RegisterSignalHandler)?;
1593 }
1594 block_signal(SIGRTMIN() + 0).map_err(Error::BlockSignal)?;
1595 } else {
1596 unsafe {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001597 extern "C" fn handle_signal<T: Vcpu>() {
1598 T::set_local_immediate_exit(true);
Matt Delco84cf9c02019-10-07 22:38:13 -07001599 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001600 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal::<T>)
Matt Delco84cf9c02019-10-07 22:38:13 -07001601 .map_err(Error::RegisterSignalHandler)?;
1602 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001603 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001604 Ok(())
1605}
1606
Steven Richmanf32d0b42020-06-20 21:45:32 -07001607// Sets up a vcpu and converts it into a runnable vcpu.
Zach Reizner2c770e62020-09-30 16:49:59 -07001608fn runnable_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001609 cpu_id: usize,
1610 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07001611 vm: impl VmArch,
1612 irq_chip: &mut impl IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001613 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001614 run_rt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001615 vcpu_affinity: Vec<usize>,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001616 no_smt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001617 has_bios: bool,
1618 use_hypervisor_signals: bool,
Zach Reizner2c770e62020-09-30 16:49:59 -07001619) -> Result<(V, VcpuRunHandle)>
Steven Richmanf32d0b42020-06-20 21:45:32 -07001620where
Zach Reizner2c770e62020-09-30 16:49:59 -07001621 V: VcpuArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001622{
Zach Reizner304e7312020-09-29 16:00:24 -07001623 let mut vcpu = match vcpu {
1624 Some(v) => v,
1625 None => {
1626 // If vcpu is None, it means this arch/hypervisor requires create_vcpu to be called from
1627 // the vcpu thread.
1628 match vm
1629 .create_vcpu(cpu_id)
1630 .map_err(Error::CreateVcpu)?
1631 .downcast::<V>()
1632 {
1633 Ok(v) => *v,
1634 Err(_) => panic!("VM created wrong type of VCPU"),
1635 }
1636 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001637 };
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001638
Steven Richmanf32d0b42020-06-20 21:45:32 -07001639 irq_chip
Zach Reizner304e7312020-09-29 16:00:24 -07001640 .add_vcpu(cpu_id, &vcpu)
Steven Richmanf32d0b42020-06-20 21:45:32 -07001641 .map_err(Error::AddIrqChipVcpu)?;
1642
Daniel Verkampcaf9ced2020-09-29 15:35:02 -07001643 if !vcpu_affinity.is_empty() {
1644 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
1645 error!("Failed to set CPU affinity: {}", e);
1646 }
1647 }
1648
Steven Richmanf32d0b42020-06-20 21:45:32 -07001649 Arch::configure_vcpu(
1650 vm.get_memory(),
1651 vm.get_hypervisor(),
1652 irq_chip,
1653 &mut vcpu,
1654 cpu_id,
1655 vcpu_count,
1656 has_bios,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001657 no_smt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001658 )
1659 .map_err(Error::ConfigureVcpu)?;
1660
Steven Richmanf32d0b42020-06-20 21:45:32 -07001661 #[cfg(feature = "chromeos")]
1662 if let Err(e) = base::sched::enable_core_scheduling() {
1663 error!("Failed to enable core scheduling: {}", e);
1664 }
1665
Kansho Nishidaab205af2020-08-13 18:17:50 +09001666 if run_rt {
1667 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
1668 if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
1669 .and_then(|_| set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL)))
1670 {
1671 warn!("Failed to set vcpu to real time: {}", e);
1672 }
1673 }
1674
Steven Richmanf32d0b42020-06-20 21:45:32 -07001675 if use_hypervisor_signals {
1676 let mut v = get_blocked_signals().map_err(Error::GetSignalMask)?;
1677 v.retain(|&x| x != SIGRTMIN() + 0);
1678 vcpu.set_signal_mask(&v).map_err(Error::SettingSignalMask)?;
1679 }
1680
Zach Reizner2c770e62020-09-30 16:49:59 -07001681 let vcpu_run_handle = vcpu
1682 .take_run_handle(Some(SIGRTMIN() + 0))
1683 .map_err(Error::RunnableVcpu)?;
1684
1685 Ok((vcpu, vcpu_run_handle))
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001686}
1687
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001688#[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1689fn handle_debug_msg<V>(
1690 cpu_id: usize,
1691 vcpu: &V,
1692 guest_mem: &GuestMemory,
1693 d: VcpuDebug,
1694 reply_channel: &mpsc::Sender<VcpuDebugStatusMessage>,
1695) -> Result<()>
1696where
1697 V: VcpuArch + 'static,
1698{
1699 match d {
1700 VcpuDebug::ReadRegs => {
1701 let msg = VcpuDebugStatusMessage {
1702 cpu: cpu_id as usize,
1703 msg: VcpuDebugStatus::RegValues(
1704 Arch::debug_read_registers(vcpu as &V).map_err(Error::HandleDebugCommand)?,
1705 ),
1706 };
1707 reply_channel
1708 .send(msg)
1709 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1710 }
1711 VcpuDebug::WriteRegs(regs) => {
1712 Arch::debug_write_registers(vcpu as &V, &regs).map_err(Error::HandleDebugCommand)?;
1713 reply_channel
1714 .send(VcpuDebugStatusMessage {
1715 cpu: cpu_id as usize,
1716 msg: VcpuDebugStatus::CommandComplete,
1717 })
1718 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1719 }
1720 VcpuDebug::ReadMem(vaddr, len) => {
1721 let msg = VcpuDebugStatusMessage {
1722 cpu: cpu_id as usize,
1723 msg: VcpuDebugStatus::MemoryRegion(
1724 Arch::debug_read_memory(vcpu as &V, guest_mem, vaddr, len)
1725 .unwrap_or(Vec::new()),
1726 ),
1727 };
1728 reply_channel
1729 .send(msg)
1730 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1731 }
1732 VcpuDebug::WriteMem(vaddr, buf) => {
1733 Arch::debug_write_memory(vcpu as &V, guest_mem, vaddr, &buf)
1734 .map_err(Error::HandleDebugCommand)?;
1735 reply_channel
1736 .send(VcpuDebugStatusMessage {
1737 cpu: cpu_id as usize,
1738 msg: VcpuDebugStatus::CommandComplete,
1739 })
1740 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1741 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09001742 VcpuDebug::EnableSinglestep => {
1743 Arch::debug_enable_singlestep(vcpu as &V).map_err(Error::HandleDebugCommand)?;
1744 reply_channel
1745 .send(VcpuDebugStatusMessage {
1746 cpu: cpu_id as usize,
1747 msg: VcpuDebugStatus::CommandComplete,
1748 })
1749 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1750 }
1751 VcpuDebug::SetHwBreakPoint(addrs) => {
1752 Arch::debug_set_hw_breakpoints(vcpu as &V, &addrs)
1753 .map_err(Error::HandleDebugCommand)?;
1754 reply_channel
1755 .send(VcpuDebugStatusMessage {
1756 cpu: cpu_id as usize,
1757 msg: VcpuDebugStatus::CommandComplete,
1758 })
1759 .map_err(|e| Error::SendDebugStatus(Box::new(e)))
1760 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001761 }
1762}
1763
Zach Reizner2c770e62020-09-30 16:49:59 -07001764fn run_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001765 cpu_id: usize,
1766 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07001767 vm: impl VmArch + 'static,
1768 mut irq_chip: impl IrqChipArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001769 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001770 run_rt: bool,
Daniel Verkamp107edb32019-04-05 09:58:48 -07001771 vcpu_affinity: Vec<usize>,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001772 no_smt: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001773 start_barrier: Arc<Barrier>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001774 has_bios: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001775 io_bus: devices::Bus,
1776 mmio_bus: devices::Bus,
Michael Hoyle685316f2020-09-16 15:29:20 -07001777 exit_evt: Event,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001778 requires_pvclock_ctrl: bool,
Dylan Reid3d637062019-05-19 15:06:26 -07001779 from_main_channel: mpsc::Receiver<VcpuControl>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001780 use_hypervisor_signals: bool,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001781 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] to_gdb_channel: Option<
1782 mpsc::Sender<VcpuDebugStatusMessage>,
1783 >,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001784) -> Result<JoinHandle<()>>
1785where
Zach Reizner2c770e62020-09-30 16:49:59 -07001786 V: VcpuArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001787{
Zach Reizner8fb52112017-12-13 16:04:39 -08001788 thread::Builder::new()
1789 .name(format!("crosvm_vcpu{}", cpu_id))
1790 .spawn(move || {
Zach Reizner95885312020-01-29 18:06:01 -08001791 // The VCPU thread must trigger the `exit_evt` in all paths, and a `ScopedEvent`'s Drop
1792 // implementation accomplishes that.
1793 let _scoped_exit_evt = ScopedEvent::from(exit_evt);
1794
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001795 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1796 let guest_mem = vm.get_memory().clone();
Zach Reizner2c770e62020-09-30 16:49:59 -07001797 let runnable_vcpu = runnable_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001798 cpu_id,
1799 vcpu,
1800 vm,
1801 &mut irq_chip,
1802 vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001803 run_rt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001804 vcpu_affinity,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001805 no_smt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001806 has_bios,
1807 use_hypervisor_signals,
1808 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08001809
Zach Reizner8fb52112017-12-13 16:04:39 -08001810 start_barrier.wait();
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001811
Zach Reizner2c770e62020-09-30 16:49:59 -07001812 let (vcpu, vcpu_run_handle) = match runnable_vcpu {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001813 Ok(v) => v,
1814 Err(e) => {
1815 error!("failed to start vcpu {}: {}", cpu_id, e);
1816 return;
1817 }
1818 };
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001819
Dylan Reidb0492662019-05-17 14:50:13 -07001820 let mut run_mode = VmRunMode::Running;
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001821 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1822 if to_gdb_channel.is_some() {
1823 // Wait until a GDB client attaches
1824 run_mode = VmRunMode::Breakpoint;
1825 }
1826
Dylan Reidb0492662019-05-17 14:50:13 -07001827 let mut interrupted_by_signal = false;
1828
1829 'vcpu_loop: loop {
1830 // Start by checking for messages to process and the run state of the CPU.
1831 // An extra check here for Running so there isn't a need to call recv unless a
1832 // message is likely to be ready because a signal was sent.
1833 if interrupted_by_signal || run_mode != VmRunMode::Running {
1834 'state_loop: loop {
1835 // Tries to get a pending message without blocking first.
1836 let msg = match from_main_channel.try_recv() {
1837 Ok(m) => m,
1838 Err(mpsc::TryRecvError::Empty) if run_mode == VmRunMode::Running => {
1839 // If the VM is running and no message is pending, the state won't
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001840 // change.
Dylan Reidb0492662019-05-17 14:50:13 -07001841 break 'state_loop;
1842 }
1843 Err(mpsc::TryRecvError::Empty) => {
1844 // If the VM is not running, wait until a message is ready.
1845 match from_main_channel.recv() {
1846 Ok(m) => m,
1847 Err(mpsc::RecvError) => {
1848 error!("Failed to read from main channel in vcpu");
1849 break 'vcpu_loop;
1850 }
1851 }
1852 }
1853 Err(mpsc::TryRecvError::Disconnected) => {
1854 error!("Failed to read from main channel in vcpu");
1855 break 'vcpu_loop;
1856 }
1857 };
1858
1859 // Collect all pending messages.
1860 let mut messages = vec![msg];
1861 messages.append(&mut from_main_channel.try_iter().collect());
1862
Keiichi Watanabec5262e92020-10-21 15:57:33 +09001863 for msg in messages {
1864 match msg {
1865 VcpuControl::RunState(new_mode) => {
1866 run_mode = new_mode;
1867 match run_mode {
1868 VmRunMode::Running => break 'state_loop,
1869 VmRunMode::Suspending => {
1870 // On KVM implementations that use a paravirtualized
1871 // clock (e.g. x86), a flag must be set to indicate to
1872 // the guest kernel that a vCPU was suspended. The guest
1873 // kernel will use this flag to prevent the soft lockup
1874 // detection from triggering when this vCPU resumes,
1875 // which could happen days later in realtime.
1876 if requires_pvclock_ctrl {
1877 if let Err(e) = vcpu.pvclock_ctrl() {
1878 error!(
1879 "failed to tell hypervisor vcpu {} is suspending: {}",
1880 cpu_id, e
1881 );
1882 }
1883 }
1884 }
1885 VmRunMode::Breakpoint => {}
1886 VmRunMode::Exiting => break 'vcpu_loop,
1887 }
1888 }
1889 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1890 VcpuControl::Debug(d) => {
1891 match &to_gdb_channel {
1892 Some(ref ch) => {
1893 if let Err(e) = handle_debug_msg(
1894 cpu_id, &vcpu, &guest_mem, d, &ch,
1895 ) {
1896 error!("Failed to handle gdb message: {}", e);
1897 }
1898 },
1899 None => {
1900 error!("VcpuControl::Debug received while GDB feature is disabled: {:?}", d);
Dylan Reidb0492662019-05-17 14:50:13 -07001901 }
1902 }
1903 }
Dylan Reidb0492662019-05-17 14:50:13 -07001904 }
1905 }
1906 }
1907 }
1908
1909 interrupted_by_signal = false;
1910
Steven Richman11dc6712020-09-02 15:39:14 -07001911 // Vcpus may have run a HLT instruction, which puts them into a state other than
1912 // VcpuRunState::Runnable. In that case, this call to wait_until_runnable blocks
1913 // until either the irqchip receives an interrupt for this vcpu, or until the main
1914 // thread kicks this vcpu as a result of some VmControl operation. In most IrqChip
1915 // implementations HLT instructions do not make it to crosvm, and thus this is a
1916 // no-op that always returns VcpuRunState::Runnable.
1917 match irq_chip.wait_until_runnable(&vcpu) {
1918 Ok(VcpuRunState::Runnable) => {}
1919 Ok(VcpuRunState::Interrupted) => interrupted_by_signal = true,
1920 Err(e) => error!(
1921 "error waiting for vcpu {} to become runnable: {}",
1922 cpu_id, e
1923 ),
1924 }
1925
1926 if !interrupted_by_signal {
1927 match vcpu.run(&vcpu_run_handle) {
1928 Ok(VcpuExit::IoIn { port, mut size }) => {
1929 let mut data = [0; 8];
1930 if size > data.len() {
1931 error!("unsupported IoIn size of {} bytes", size);
1932 size = data.len();
Keiichi Watanabe23f94712020-10-22 17:43:06 +09001933 }
Steven Richman11dc6712020-09-02 15:39:14 -07001934 io_bus.read(port as u64, &mut data[..size]);
1935 if let Err(e) = vcpu.set_data(&data[..size]) {
1936 error!("failed to set return data for IoIn: {}", e);
1937 }
Keiichi Watanabe23f94712020-10-22 17:43:06 +09001938 }
Steven Richman11dc6712020-09-02 15:39:14 -07001939 Ok(VcpuExit::IoOut {
1940 port,
1941 mut size,
1942 data,
1943 }) => {
1944 if size > data.len() {
1945 error!("unsupported IoOut size of {} bytes", size);
1946 size = data.len();
1947 }
1948 io_bus.write(port as u64, &data[..size]);
1949 }
1950 Ok(VcpuExit::MmioRead { address, size }) => {
1951 let mut data = [0; 8];
1952 mmio_bus.read(address, &mut data[..size]);
1953 // Setting data for mmio can not fail.
1954 let _ = vcpu.set_data(&data[..size]);
1955 }
1956 Ok(VcpuExit::MmioWrite {
1957 address,
1958 size,
1959 data,
1960 }) => {
1961 mmio_bus.write(address, &data[..size]);
1962 }
1963 Ok(VcpuExit::IoapicEoi { vector }) => {
1964 if let Err(e) = irq_chip.broadcast_eoi(vector) {
1965 error!(
1966 "failed to broadcast eoi {} on vcpu {}: {}",
1967 vector, cpu_id, e
1968 );
1969 }
1970 }
1971 Ok(VcpuExit::IrqWindowOpen) => {}
1972 Ok(VcpuExit::Hlt) => irq_chip.halted(cpu_id),
1973 Ok(VcpuExit::Shutdown) => break,
1974 Ok(VcpuExit::FailEntry {
1975 hardware_entry_failure_reason,
1976 }) => {
1977 error!("vcpu hw run failure: {:#x}", hardware_entry_failure_reason);
Steven Richmanf32d0b42020-06-20 21:45:32 -07001978 break;
1979 }
Steven Richman11dc6712020-09-02 15:39:14 -07001980 Ok(VcpuExit::SystemEvent(_, _)) => break,
1981 Ok(VcpuExit::Debug { .. }) => {
1982 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
1983 {
1984 let msg = VcpuDebugStatusMessage {
1985 cpu: cpu_id as usize,
1986 msg: VcpuDebugStatus::HitBreakPoint,
1987 };
1988 if let Some(ref ch) = to_gdb_channel {
1989 if let Err(e) = ch.send(msg) {
1990 error!("failed to notify breakpoint to GDB thread: {}", e);
1991 break;
1992 }
1993 }
1994 run_mode = VmRunMode::Breakpoint;
1995 }
1996 }
1997 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
1998 Err(e) => match e.errno() {
1999 libc::EINTR => interrupted_by_signal = true,
2000 libc::EAGAIN => {}
2001 _ => {
2002 error!("vcpu hit unknown error: {}", e);
2003 break;
2004 }
2005 },
2006 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002007 }
2008
2009 if interrupted_by_signal {
2010 if use_hypervisor_signals {
2011 // Try to clear the signal that we use to kick VCPU if it is pending before
2012 // attempting to handle pause requests.
2013 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
2014 error!("failed to clear pending signal: {}", e);
2015 break;
2016 }
2017 } else {
2018 vcpu.set_immediate_exit(false);
2019 }
David Tolnay8f3a2322018-11-30 17:11:35 -08002020 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002021
Steven Richman11dc6712020-09-02 15:39:14 -07002022 if let Err(e) = irq_chip.inject_interrupts(&vcpu) {
2023 error!("failed to inject interrupts for vcpu {}: {}", cpu_id, e);
2024 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002025 }
David Tolnay2bac1e72018-12-12 14:33:42 -08002026 })
2027 .map_err(Error::SpawnVcpu)
Zach Reizner39aa26b2017-12-12 18:03:23 -08002028}
2029
Charles William Dick0bf8a552019-10-29 15:36:01 +09002030// Reads the contents of a file and converts the space-separated fields into a Vec of i64s.
Sonny Raod5f66082019-04-24 12:24:38 -07002031// Returns an error if any of the fields fail to parse.
Charles William Dick0bf8a552019-10-29 15:36:01 +09002032fn file_fields_to_i64<P: AsRef<Path>>(path: P) -> io::Result<Vec<i64>> {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002033 let mut file = File::open(path)?;
2034
2035 let mut buf = [0u8; 32];
2036 let count = file.read(&mut buf)?;
2037
Zach Reizner55a9e502018-10-03 10:22:32 -07002038 let content =
2039 str::from_utf8(&buf[..count]).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
2040 content
2041 .trim()
Sonny Raod5f66082019-04-24 12:24:38 -07002042 .split_whitespace()
2043 .map(|x| {
Charles William Dick0bf8a552019-10-29 15:36:01 +09002044 x.parse::<i64>()
Sonny Raod5f66082019-04-24 12:24:38 -07002045 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
2046 })
2047 .collect()
2048}
2049
2050// Reads the contents of a file and converts them into a u64, and if there
2051// are multiple fields it only returns the first one.
Charles William Dick0bf8a552019-10-29 15:36:01 +09002052fn file_to_i64<P: AsRef<Path>>(path: P) -> io::Result<i64> {
2053 file_fields_to_i64(path)?
Sonny Raod5f66082019-04-24 12:24:38 -07002054 .into_iter()
2055 .next()
2056 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "empty file"))
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002057}
2058
Steven Richmanf32d0b42020-06-20 21:45:32 -07002059fn create_kvm(mem: GuestMemory) -> base::Result<KvmVm> {
2060 let kvm = Kvm::new()?;
2061 let vm = KvmVm::new(&kvm, mem)?;
2062 Ok(vm)
2063}
2064
2065fn create_kvm_kernel_irq_chip(
2066 vm: &KvmVm,
2067 vcpu_count: usize,
2068 _ioapic_device_socket: VmIrqRequestSocket,
Zach Reizner304e7312020-09-29 16:00:24 -07002069) -> base::Result<impl IrqChipArch> {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002070 let irq_chip = KvmKernelIrqChip::new(vm.try_clone()?, vcpu_count)?;
2071 Ok(irq_chip)
2072}
2073
2074#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2075fn create_kvm_split_irq_chip(
2076 vm: &KvmVm,
2077 vcpu_count: usize,
2078 ioapic_device_socket: VmIrqRequestSocket,
Zach Reizner304e7312020-09-29 16:00:24 -07002079) -> base::Result<impl IrqChipArch> {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002080 let irq_chip = KvmSplitIrqChip::new(vm.try_clone()?, vcpu_count, ioapic_device_socket)?;
2081 Ok(irq_chip)
2082}
2083
Dylan Reid059a1882018-07-23 17:58:09 -07002084pub fn run_config(cfg: Config) -> Result<()> {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002085 if cfg.split_irqchip {
2086 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
2087 {
2088 unimplemented!("KVM split irqchip mode only supported on x86 processors")
2089 }
2090
2091 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
2092 {
Zach Reizner304e7312020-09-29 16:00:24 -07002093 run_vm::<_, KvmVcpu, _, _, _>(cfg, create_kvm, create_kvm_split_irq_chip)
Steven Richmanf32d0b42020-06-20 21:45:32 -07002094 }
2095 } else {
Zach Reizner304e7312020-09-29 16:00:24 -07002096 run_vm::<_, KvmVcpu, _, _, _>(cfg, create_kvm, create_kvm_kernel_irq_chip)
Steven Richmanf32d0b42020-06-20 21:45:32 -07002097 }
2098}
2099
Zach Reizner304e7312020-09-29 16:00:24 -07002100fn run_vm<V, Vcpu, I, FV, FI>(cfg: Config, create_vm: FV, create_irq_chip: FI) -> Result<()>
Steven Richmanf32d0b42020-06-20 21:45:32 -07002101where
2102 V: VmArch + 'static,
Zach Reizner304e7312020-09-29 16:00:24 -07002103 Vcpu: VcpuArch + 'static,
2104 I: IrqChipArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002105 FV: FnOnce(GuestMemory) -> base::Result<V>,
2106 FI: FnOnce(
2107 &V,
2108 usize, // vcpu_count
2109 VmIrqRequestSocket, // ioapic_device_socket
2110 ) -> base::Result<I>,
2111{
Lepton Wu9105e9f2019-03-14 11:38:31 -07002112 if cfg.sandbox {
Dylan Reid059a1882018-07-23 17:58:09 -07002113 // Printing something to the syslog before entering minijail so that libc's syslogger has a
2114 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
2115 // access to those files will not be possible.
2116 info!("crosvm entering multiprocess mode");
2117 }
2118
Jingkui Wang100e6e42019-03-08 20:41:57 -08002119 let (usb_control_socket, usb_provider) =
David Tolnay5fb3f512019-04-12 19:22:33 -07002120 HostBackendDeviceProvider::new().map_err(Error::CreateUsbProvider)?;
Dylan Reid059a1882018-07-23 17:58:09 -07002121 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
2122 // before any jailed devices have been spawned, so that we can catch any of them that fail very
2123 // quickly.
2124 let sigchld_fd = SignalFd::new(libc::SIGCHLD).map_err(Error::CreateSignalFd)?;
2125
David Tolnay2b089fc2019-03-04 15:33:22 -08002126 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
2127 Some(File::open(initrd_path).map_err(|e| Error::OpenInitrd(initrd_path.clone(), e))?)
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002128 } else {
2129 None
2130 };
2131
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002132 let vm_image = match cfg.executable_path {
2133 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
2134 File::open(kernel_path).map_err(|e| Error::OpenKernel(kernel_path.to_path_buf(), e))?,
2135 ),
2136 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
2137 File::open(bios_path).map_err(|e| Error::OpenBios(bios_path.to_path_buf(), e))?,
2138 ),
2139 _ => panic!("Did not receive a bios or kernel, should be impossible."),
2140 };
2141
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002142 let mut control_sockets = Vec::new();
2143 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2144 let gdb_socket = if let Some(port) = cfg.gdb {
2145 // GDB needs a control socket to interrupt vcpus.
2146 let (gdb_host_socket, gdb_control_socket) =
2147 msg_socket::pair::<VmResponse, VmRequest>().map_err(Error::CreateSocket)?;
2148 control_sockets.push(TaggedControlSocket::Vm(gdb_host_socket));
2149 Some((port, gdb_control_socket))
2150 } else {
2151 None
2152 };
2153
Dylan Reid059a1882018-07-23 17:58:09 -07002154 let components = VmComponents {
Daniel Verkamp6a847062019-11-26 13:16:35 -08002155 memory_size: cfg
2156 .memory
2157 .unwrap_or(256)
2158 .checked_mul(1024 * 1024)
2159 .ok_or(Error::MemoryTooLarge)?,
Dylan Reid059a1882018-07-23 17:58:09 -07002160 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -07002161 vcpu_affinity: cfg.vcpu_affinity.clone(),
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002162 no_smt: cfg.no_smt,
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002163 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002164 android_fstab: cfg
2165 .android_fstab
2166 .as_ref()
David Tolnay2b089fc2019-03-04 15:33:22 -08002167 .map(|x| File::open(x).map_err(|e| Error::OpenAndroidFstab(x.to_path_buf(), e)))
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002168 .map_or(Ok(None), |v| v.map(Some))?,
Kansho Nishida282115b2019-12-18 13:13:14 +09002169 pstore: cfg.pstore.clone(),
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002170 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -07002171 extra_kernel_params: cfg.params.clone(),
2172 wayland_dmabuf: cfg.wayland_dmabuf,
Tomasz Jeznach42644642020-05-20 23:27:59 -07002173 acpi_sdts: cfg
2174 .acpi_tables
2175 .iter()
2176 .map(|path| SDT::from_file(path).map_err(|e| Error::OpenAcpiTable(path.clone(), e)))
2177 .collect::<Result<Vec<SDT>>>()?,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002178 rt_cpus: cfg.rt_cpus.clone(),
Will Deacon7d2b8ac2020-10-06 18:51:12 +01002179 protected_vm: cfg.protected_vm,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002180 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2181 gdb: gdb_socket,
Dylan Reid059a1882018-07-23 17:58:09 -07002182 };
2183
Zach Reiznera60744b2019-02-13 17:33:32 -08002184 let control_server_socket = match &cfg.socket_path {
2185 Some(path) => Some(UnlinkUnixSeqpacketListener(
2186 UnixSeqpacketListener::bind(path).map_err(Error::CreateSocket)?,
2187 )),
2188 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07002189 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002190
Zach Reizner55a9e502018-10-03 10:22:32 -07002191 let (wayland_host_socket, wayland_device_socket) =
Gurchetan Singh53edb812019-05-22 08:57:16 -07002192 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?;
2193 control_sockets.push(TaggedControlSocket::VmMemory(wayland_host_socket));
Dylan Reid059a1882018-07-23 17:58:09 -07002194 // Balloon gets a special socket so balloon requests can be forwarded from the main process.
Zach Reizner55a9e502018-10-03 10:22:32 -07002195 let (balloon_host_socket, balloon_device_socket) =
Charles William Dick664cc3c2020-01-10 14:31:52 +09002196 msg_socket::pair::<BalloonControlCommand, BalloonControlResult>()
2197 .map_err(Error::CreateSocket)?;
Dylan Reid059a1882018-07-23 17:58:09 -07002198
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002199 // Create one control socket per disk.
2200 let mut disk_device_sockets = Vec::new();
2201 let mut disk_host_sockets = Vec::new();
2202 let disk_count = cfg.disks.len();
2203 for _ in 0..disk_count {
2204 let (disk_host_socket, disk_device_socket) =
Jakub Staronecf81e02019-04-11 11:43:39 -07002205 msg_socket::pair::<DiskControlCommand, DiskControlResult>()
2206 .map_err(Error::CreateSocket)?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002207 disk_host_sockets.push(disk_host_socket);
Jakub Starone7c59052019-04-09 12:31:14 -07002208 disk_device_sockets.push(disk_device_socket);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002209 }
2210
Daniel Verkampe1980a92020-02-07 11:00:55 -08002211 let mut pmem_device_sockets = Vec::new();
2212 let pmem_count = cfg.pmem_devices.len();
2213 for _ in 0..pmem_count {
2214 let (pmem_host_socket, pmem_device_socket) =
2215 msg_socket::pair::<VmMsyncResponse, VmMsyncRequest>().map_err(Error::CreateSocket)?;
2216 pmem_device_sockets.push(pmem_device_socket);
2217 control_sockets.push(TaggedControlSocket::VmMsync(pmem_host_socket));
2218 }
2219
Gurchetan Singh96beafc2019-05-15 09:46:52 -07002220 let (gpu_host_socket, gpu_device_socket) =
2221 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?;
2222 control_sockets.push(TaggedControlSocket::VmMemory(gpu_host_socket));
2223
Zhuocheng Dingf2e90bf2019-12-02 15:50:20 +08002224 let (ioapic_host_socket, ioapic_device_socket) =
2225 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
2226 control_sockets.push(TaggedControlSocket::VmIrq(ioapic_host_socket));
2227
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002228 let battery = if cfg.battery_type.is_some() {
Alex Lauf408c732020-11-10 18:24:04 +09002229 let jail = match simple_jail(&cfg, "battery")? {
2230 #[cfg_attr(not(feature = "powerd-monitor-powerd"), allow(unused_mut))]
2231 Some(mut jail) => {
2232 // Setup a bind mount to the system D-Bus socket if the powerd monitor is used.
2233 #[cfg(feature = "power-monitor-powerd")]
2234 {
2235 add_crosvm_user_to_jail(&mut jail, "battery")?;
2236
2237 // Create a tmpfs in the device's root directory so that we can bind mount files.
2238 jail.mount_with_data(
2239 Path::new("none"),
2240 Path::new("/"),
2241 "tmpfs",
2242 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
2243 "size=67108864",
2244 )?;
2245
2246 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
2247 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
2248 }
2249 Some(jail)
2250 }
2251 None => None,
2252 };
2253 (&cfg.battery_type, jail)
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002254 } else {
2255 (&cfg.battery_type, None)
2256 };
2257
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002258 let map_request: Arc<Mutex<Option<ExternalMapping>>> = Arc::new(Mutex::new(None));
2259
Zach Reizner304e7312020-09-29 16:00:24 -07002260 let linux: RunnableLinuxVm<_, Vcpu, _> = Arch::build_vm(
Trent Begin17ccaad2019-04-17 13:51:25 -06002261 components,
Trent Begin17ccaad2019-04-17 13:51:25 -06002262 &cfg.serial_parameters,
Matt Delco45caf912019-11-13 08:11:09 -08002263 simple_jail(&cfg, "serial")?,
Chuanxiao Dongbbb32af2020-04-27 16:37:10 +08002264 battery,
Jakub Starona3411ea2019-04-24 10:55:25 -07002265 |mem, vm, sys_allocator, exit_evt| {
Trent Begin17ccaad2019-04-17 13:51:25 -06002266 create_devices(
2267 &cfg,
Jakub Starona3411ea2019-04-24 10:55:25 -07002268 mem,
2269 vm,
2270 sys_allocator,
2271 exit_evt,
Xiong Zhanga5d248c2019-09-17 14:17:19 -07002272 &mut control_sockets,
Trent Begin17ccaad2019-04-17 13:51:25 -06002273 wayland_device_socket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07002274 gpu_device_socket,
Trent Begin17ccaad2019-04-17 13:51:25 -06002275 balloon_device_socket,
2276 &mut disk_device_sockets,
Daniel Verkampe1980a92020-02-07 11:00:55 -08002277 &mut pmem_device_sockets,
Trent Begin17ccaad2019-04-17 13:51:25 -06002278 usb_provider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002279 Arc::clone(&map_request),
Trent Begin17ccaad2019-04-17 13:51:25 -06002280 )
2281 },
Steven Richmanf32d0b42020-06-20 21:45:32 -07002282 create_vm,
2283 |vm, vcpu_count| create_irq_chip(vm, vcpu_count, ioapic_device_socket),
Trent Begin17ccaad2019-04-17 13:51:25 -06002284 )
David Tolnaybe034262019-03-04 17:48:36 -08002285 .map_err(Error::BuildVm)?;
Lepton Wu60893882018-11-21 11:06:18 -08002286
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002287 run_control(
2288 linux,
Zach Reiznera60744b2019-02-13 17:33:32 -08002289 control_server_socket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002290 control_sockets,
2291 balloon_host_socket,
2292 &disk_host_sockets,
Jingkui Wang100e6e42019-03-08 20:41:57 -08002293 usb_control_socket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002294 sigchld_fd,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002295 cfg.sandbox,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002296 Arc::clone(&map_request),
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002297 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07002298}
2299
Steven Richman11dc6712020-09-02 15:39:14 -07002300/// Signals all running VCPUs to vmexit, sends VmRunMode message to each VCPU channel, and tells
2301/// `irq_chip` to stop blocking halted VCPUs. The channel message is set first because both the
2302/// signal and the irq_chip kick could cause the VCPU thread to continue through the VCPU run
2303/// loop.
2304fn kick_all_vcpus(
2305 vcpu_handles: &[(JoinHandle<()>, mpsc::Sender<vm_control::VcpuControl>)],
2306 irq_chip: &impl IrqChip,
2307 run_mode: &VmRunMode,
2308) {
2309 for (handle, channel) in vcpu_handles {
2310 if let Err(e) = channel.send(VcpuControl::RunState(run_mode.clone())) {
2311 error!("failed to send VmRunMode: {}", e);
2312 }
2313 let _ = handle.kill(SIGRTMIN() + 0);
2314 }
2315 irq_chip.kick_halted_vcpus();
2316}
2317
Zach Reizner304e7312020-09-29 16:00:24 -07002318fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static, I: IrqChipArch + 'static>(
2319 mut linux: RunnableLinuxVm<V, Vcpu, I>,
Zach Reiznera60744b2019-02-13 17:33:32 -08002320 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Jakub Starond99cd0a2019-04-11 14:09:39 -07002321 mut control_sockets: Vec<TaggedControlSocket>,
Jakub Staron1f828d72019-04-11 12:49:29 -07002322 balloon_host_socket: BalloonControlRequestSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07002323 disk_host_sockets: &[DiskControlRequestSocket],
Jingkui Wang100e6e42019-03-08 20:41:57 -08002324 usb_control_socket: UsbControlSocket,
Zach Reizner55a9e502018-10-03 10:22:32 -07002325 sigchld_fd: SignalFd,
Lepton Wu20333e42019-03-14 10:48:03 -07002326 sandbox: bool,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002327 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Zach Reizner55a9e502018-10-03 10:22:32 -07002328) -> Result<()> {
David Tolnay5bbbf612018-12-01 17:49:30 -08002329 const LOWMEM_AVAILABLE: &str = "/sys/kernel/mm/chromeos-low_mem/available";
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002330
Zach Reizner5bed0d22018-03-28 02:31:11 -07002331 #[derive(PollToken)]
2332 enum Token {
2333 Exit,
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002334 Suspend,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002335 ChildSignal,
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002336 IrqFd { index: IrqEventIndex },
Charles William Dick0bf8a552019-10-29 15:36:01 +09002337 BalanceMemory,
2338 BalloonResult,
Zach Reiznera60744b2019-02-13 17:33:32 -08002339 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002340 VmControl { index: usize },
2341 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002342
Zach Reizner19ad1f32019-12-12 18:58:50 -08002343 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08002344 .set_raw_mode()
2345 .expect("failed to set terminal raw mode");
2346
Michael Hoylee392c462020-10-07 03:29:24 -07002347 let wait_ctx = WaitContext::build_with(&[
Zach Reiznerb2110be2019-07-23 15:55:03 -07002348 (&linux.exit_evt, Token::Exit),
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002349 (&linux.suspend_evt, Token::Suspend),
Zach Reiznerb2110be2019-07-23 15:55:03 -07002350 (&sigchld_fd, Token::ChildSignal),
2351 ])
Michael Hoylee392c462020-10-07 03:29:24 -07002352 .map_err(Error::WaitContextAdd)?;
Zach Reiznerb2110be2019-07-23 15:55:03 -07002353
Zach Reiznera60744b2019-02-13 17:33:32 -08002354 if let Some(socket_server) = &control_server_socket {
Michael Hoylee392c462020-10-07 03:29:24 -07002355 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08002356 .add(socket_server, Token::VmControlServer)
Michael Hoylee392c462020-10-07 03:29:24 -07002357 .map_err(Error::WaitContextAdd)?;
Zach Reiznera60744b2019-02-13 17:33:32 -08002358 }
Dylan Reid059a1882018-07-23 17:58:09 -07002359 for (index, socket) in control_sockets.iter().enumerate() {
Michael Hoylee392c462020-10-07 03:29:24 -07002360 wait_ctx
Zach Reizner55a9e502018-10-03 10:22:32 -07002361 .add(socket.as_ref(), Token::VmControl { index })
Michael Hoylee392c462020-10-07 03:29:24 -07002362 .map_err(Error::WaitContextAdd)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002363 }
2364
Steven Richmanf32d0b42020-06-20 21:45:32 -07002365 let events = linux
2366 .irq_chip
2367 .irq_event_tokens()
Michael Hoylee392c462020-10-07 03:29:24 -07002368 .map_err(Error::WaitContextAdd)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002369
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002370 for (index, _gsi, evt) in events {
Michael Hoylee392c462020-10-07 03:29:24 -07002371 wait_ctx
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002372 .add(&evt, Token::IrqFd { index })
Michael Hoylee392c462020-10-07 03:29:24 -07002373 .map_err(Error::WaitContextAdd)?;
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002374 }
2375
Charles William Dick0bf8a552019-10-29 15:36:01 +09002376 // Balance available memory between guest and host every second.
Michael Hoyle08d86a42020-08-19 14:45:21 -07002377 let mut balancemem_timer = Timer::new().map_err(Error::CreateTimer)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002378 if Path::new(LOWMEM_AVAILABLE).exists() {
2379 // Create timer request balloon stats every 1s.
Michael Hoylee392c462020-10-07 03:29:24 -07002380 wait_ctx
Charles William Dick0bf8a552019-10-29 15:36:01 +09002381 .add(&balancemem_timer, Token::BalanceMemory)
Michael Hoylee392c462020-10-07 03:29:24 -07002382 .map_err(Error::WaitContextAdd)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002383 let balancemem_dur = Duration::from_secs(1);
2384 let balancemem_int = Duration::from_secs(1);
2385 balancemem_timer
2386 .reset(balancemem_dur, Some(balancemem_int))
Michael Hoyle08d86a42020-08-19 14:45:21 -07002387 .map_err(Error::ResetTimer)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002388
2389 // Listen for balloon statistics from the guest so we can balance.
Michael Hoylee392c462020-10-07 03:29:24 -07002390 wait_ctx
Charles William Dick0bf8a552019-10-29 15:36:01 +09002391 .add(&balloon_host_socket, Token::BalloonResult)
Michael Hoylee392c462020-10-07 03:29:24 -07002392 .map_err(Error::WaitContextAdd)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002393 } else {
2394 warn!("Unable to open low mem available, maybe not a chrome os kernel");
2395 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002396
Lepton Wu20333e42019-03-14 10:48:03 -07002397 if sandbox {
2398 // Before starting VCPUs, in case we started with some capabilities, drop them all.
2399 drop_capabilities().map_err(Error::DropCapabilities)?;
2400 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08002401
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002402 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2403 // Create a channel for GDB thread.
2404 let (to_gdb_channel, from_vcpu_channel) = if linux.gdb.is_some() {
2405 let (s, r) = mpsc::channel();
2406 (Some(s), Some(r))
2407 } else {
2408 (None, None)
2409 };
2410
Steven Richmanf32d0b42020-06-20 21:45:32 -07002411 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
2412 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
Steven Richmanf32d0b42020-06-20 21:45:32 -07002413 let use_hypervisor_signals = !linux
2414 .vm
2415 .get_hypervisor()
2416 .check_capability(&HypervisorCap::ImmediateExit);
Zach Reizner304e7312020-09-29 16:00:24 -07002417 setup_vcpu_signal_handler::<Vcpu>(use_hypervisor_signals)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002418
Zach Reizner304e7312020-09-29 16:00:24 -07002419 let vcpus: Vec<Option<_>> = match linux.vcpus.take() {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002420 Some(vec) => vec.into_iter().map(|vcpu| Some(vcpu)).collect(),
2421 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
2422 };
Daniel Verkamp94c35272019-09-12 13:31:30 -07002423 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Dylan Reidb0492662019-05-17 14:50:13 -07002424 let (to_vcpu_channel, from_main_channel) = mpsc::channel();
Daniel Verkampc677fb42020-09-08 13:47:49 -07002425 let vcpu_affinity = match linux.vcpu_affinity.clone() {
2426 Some(VcpuAffinity::Global(v)) => v,
2427 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(),
2428 None => Default::default(),
2429 };
Zach Reizner55a9e502018-10-03 10:22:32 -07002430 let handle = run_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002431 cpu_id,
Zach Reizner55a9e502018-10-03 10:22:32 -07002432 vcpu,
Michael Hoyle685316f2020-09-16 15:29:20 -07002433 linux.vm.try_clone().map_err(Error::CloneEvent)?,
2434 linux.irq_chip.try_clone().map_err(Error::CloneEvent)?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002435 linux.vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002436 linux.rt_cpus.contains(&cpu_id),
Daniel Verkampc677fb42020-09-08 13:47:49 -07002437 vcpu_affinity,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002438 linux.no_smt,
Zach Reizner55a9e502018-10-03 10:22:32 -07002439 vcpu_thread_barrier.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002440 linux.has_bios,
Zach Reizner55a9e502018-10-03 10:22:32 -07002441 linux.io_bus.clone(),
2442 linux.mmio_bus.clone(),
Michael Hoyle685316f2020-09-16 15:29:20 -07002443 linux.exit_evt.try_clone().map_err(Error::CloneEvent)?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002444 linux.vm.check_capability(VmCap::PvClockSuspend),
Dylan Reidb0492662019-05-17 14:50:13 -07002445 from_main_channel,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002446 use_hypervisor_signals,
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002447 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2448 to_gdb_channel.clone(),
Zach Reizner55a9e502018-10-03 10:22:32 -07002449 )?;
Dylan Reidb0492662019-05-17 14:50:13 -07002450 vcpu_handles.push((handle, to_vcpu_channel));
Dylan Reid059a1882018-07-23 17:58:09 -07002451 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002452
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002453 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2454 // Spawn GDB thread.
2455 if let Some((gdb_port_num, gdb_control_socket)) = linux.gdb.take() {
2456 let to_vcpu_channels = vcpu_handles
2457 .iter()
2458 .map(|(_handle, channel)| channel.clone())
2459 .collect();
2460 let target = GdbStub::new(
2461 gdb_control_socket,
2462 to_vcpu_channels,
2463 from_vcpu_channel.unwrap(), // Must succeed to unwrap()
2464 );
2465 thread::Builder::new()
2466 .name("gdb".to_owned())
2467 .spawn(move || gdb_thread(target, gdb_port_num))
2468 .map_err(Error::SpawnGdbServer)?;
2469 };
2470
Dylan Reid059a1882018-07-23 17:58:09 -07002471 vcpu_thread_barrier.wait();
2472
Michael Hoylee392c462020-10-07 03:29:24 -07002473 'wait: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002474 let events = {
Michael Hoylee392c462020-10-07 03:29:24 -07002475 match wait_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002476 Ok(v) => v,
2477 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08002478 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08002479 break;
2480 }
2481 }
2482 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002483
Steven Richmanf32d0b42020-06-20 21:45:32 -07002484 if let Err(e) = linux.irq_chip.process_delayed_irq_events() {
2485 warn!("can't deliver delayed irqs: {}", e);
2486 }
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002487
Zach Reiznera60744b2019-02-13 17:33:32 -08002488 let mut vm_control_indices_to_remove = Vec::new();
Michael Hoylee392c462020-10-07 03:29:24 -07002489 for event in events.iter().filter(|e| e.is_readable) {
2490 match event.token {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002491 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002492 info!("vcpu requested shutdown");
Michael Hoylee392c462020-10-07 03:29:24 -07002493 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002494 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002495 Token::Suspend => {
2496 info!("VM requested suspend");
2497 linux.suspend_evt.read().unwrap();
Steven Richman11dc6712020-09-02 15:39:14 -07002498 kick_all_vcpus(&vcpu_handles, &linux.irq_chip, &VmRunMode::Suspending);
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002499 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002500 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002501 // Print all available siginfo structs, then exit the loop.
David Tolnayf5032762018-12-03 10:46:45 -08002502 while let Some(siginfo) = sigchld_fd.read().map_err(Error::SignalFd)? {
Zach Reizner3ba00982019-01-23 19:04:43 -08002503 let pid = siginfo.ssi_pid;
2504 let pid_label = match linux.pid_debug_label_map.get(&pid) {
2505 Some(label) => format!("{} (pid {})", label, pid),
2506 None => format!("pid {}", pid),
2507 };
David Tolnayf5032762018-12-03 10:46:45 -08002508 error!(
2509 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08002510 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08002511 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08002512 }
Michael Hoylee392c462020-10-07 03:29:24 -07002513 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002514 }
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002515 Token::IrqFd { index } => {
2516 if let Err(e) = linux.irq_chip.service_irq_event(index) {
2517 error!("failed to signal irq {}: {}", index, e);
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002518 }
2519 }
Charles William Dick0bf8a552019-10-29 15:36:01 +09002520 Token::BalanceMemory => {
Michael Hoyle08d86a42020-08-19 14:45:21 -07002521 balancemem_timer.wait().map_err(Error::Timer)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002522 let command = BalloonControlCommand::Stats {};
2523 if let Err(e) = balloon_host_socket.send(&command) {
2524 warn!("failed to send stats request to balloon device: {}", e);
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002525 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002526 }
Charles William Dick0bf8a552019-10-29 15:36:01 +09002527 Token::BalloonResult => {
2528 match balloon_host_socket.recv() {
2529 Ok(BalloonControlResult::Stats {
2530 stats,
2531 balloon_actual: balloon_actual_u,
2532 }) => {
2533 // Available memory is reported in MB, and we need bytes.
2534 let host_available = file_to_i64(LOWMEM_AVAILABLE)
2535 .map_err(Error::ReadMemAvailable)?
2536 << 20;
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002537 let guest_free_u = if let Some(free) = stats.free_memory {
2538 free
Charles William Dick0bf8a552019-10-29 15:36:01 +09002539 } else {
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002540 warn!("guest free_memory stat is missing");
Charles William Dick0bf8a552019-10-29 15:36:01 +09002541 continue;
2542 };
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002543 let guest_cached_u = if let Some(cached) = stats.disk_caches {
2544 cached
2545 } else {
2546 warn!("guest disk_caches stat is missing");
2547 continue;
2548 };
2549 if guest_free_u > i64::max_value() as u64 {
2550 warn!("guest free memory is too large");
2551 continue;
2552 }
2553 if guest_cached_u > i64::max_value() as u64 {
2554 warn!("guest cached memory is too large");
Charles William Dick0bf8a552019-10-29 15:36:01 +09002555 continue;
2556 }
2557 if balloon_actual_u > i64::max_value() as u64 {
2558 warn!("actual balloon size is too large");
2559 continue;
2560 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002561 // Tell the guest to change the balloon size if the target balloon size
2562 // is more than 5% different from the current balloon size.
Charles William Dick0bf8a552019-10-29 15:36:01 +09002563 const RESIZE_PERCENT: i64 = 5;
2564 let balloon_actual = balloon_actual_u as i64;
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002565 let guest_free = guest_free_u as i64;
2566 let guest_cached = guest_cached_u as i64;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002567 // Compute how much memory the guest should have available after we
2568 // rebalance.
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002569 let guest_available_target = host_available;
2570 let guest_available_delta =
2571 guest_available_target - guest_free - guest_cached;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002572 // How much do we have to change the balloon to balance.
Charles William Dick0bf8a552019-10-29 15:36:01 +09002573 let balloon_target = max(balloon_actual - guest_available_delta, 0);
Steven Richmanf32d0b42020-06-20 21:45:32 -07002574 // Compute the change in balloon size in percent. If the balloon size
2575 // is 0, use 1 so we don't overflow from the infinity % increase.
Charles William Dick0bf8a552019-10-29 15:36:01 +09002576 let balloon_change_percent = (balloon_actual - balloon_target).abs()
2577 * 100
2578 / max(balloon_actual, 1);
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002579
Charles William Dick0bf8a552019-10-29 15:36:01 +09002580 if balloon_change_percent >= RESIZE_PERCENT {
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002581 info!("resizing balloon: host avail {}, guest free {} cached {} (target {}), balloon actual {} (target {})",
Daniel Verkamp1cd80992020-07-27 12:41:50 -07002582 host_available,
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002583 guest_free,
2584 guest_cached,
Daniel Verkamp1cd80992020-07-27 12:41:50 -07002585 guest_available_target,
2586 balloon_actual,
2587 balloon_target,
2588 );
Charles William Dick0bf8a552019-10-29 15:36:01 +09002589 let command = BalloonControlCommand::Adjust {
2590 num_bytes: balloon_target as u64,
2591 };
2592 if let Err(e) = balloon_host_socket.send(&command) {
2593 warn!("failed to send memory value to balloon device: {}", e);
2594 }
2595 }
2596 }
2597 Err(e) => {
2598 error!("failed to recv BalloonControlResult: {}", e);
2599 }
2600 };
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002601 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002602 Token::VmControlServer => {
2603 if let Some(socket_server) = &control_server_socket {
2604 match socket_server.accept() {
2605 Ok(socket) => {
Michael Hoylee392c462020-10-07 03:29:24 -07002606 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08002607 .add(
2608 &socket,
2609 Token::VmControl {
2610 index: control_sockets.len(),
2611 },
2612 )
Michael Hoylee392c462020-10-07 03:29:24 -07002613 .map_err(Error::WaitContextAdd)?;
Jakub Starond99cd0a2019-04-11 14:09:39 -07002614 control_sockets
2615 .push(TaggedControlSocket::Vm(MsgSocket::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08002616 }
2617 Err(e) => error!("failed to accept socket: {}", e),
2618 }
2619 }
2620 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002621 Token::VmControl { index } => {
Daniel Verkamp37c4a782019-01-04 10:44:17 -08002622 if let Some(socket) = control_sockets.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002623 match socket {
2624 TaggedControlSocket::Vm(socket) => match socket.recv() {
2625 Ok(request) => {
2626 let mut run_mode_opt = None;
2627 let response = request.execute(
2628 &mut run_mode_opt,
2629 &balloon_host_socket,
2630 disk_host_sockets,
2631 &usb_control_socket,
Chuanxiao Dong256be3a2020-04-27 16:39:33 +08002632 &mut linux.bat_control,
Jakub Starond99cd0a2019-04-11 14:09:39 -07002633 );
2634 if let Err(e) = socket.send(&response) {
2635 error!("failed to send VmResponse: {}", e);
2636 }
2637 if let Some(run_mode) = run_mode_opt {
2638 info!("control socket changed run mode to {}", run_mode);
2639 match run_mode {
2640 VmRunMode::Exiting => {
Michael Hoylee392c462020-10-07 03:29:24 -07002641 break 'wait;
Jakub Starond99cd0a2019-04-11 14:09:39 -07002642 }
Keiichi Watanabec5262e92020-10-21 15:57:33 +09002643 other => {
Chuanxiao Dong2bbe85c2020-11-12 17:18:07 +08002644 if other == VmRunMode::Running {
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002645 linux.io_bus.notify_resume();
2646 }
Steven Richman11dc6712020-09-02 15:39:14 -07002647 kick_all_vcpus(
2648 &vcpu_handles,
2649 &linux.irq_chip,
2650 &other,
2651 );
Zach Reizner6a8fdd92019-01-16 14:38:41 -08002652 }
2653 }
2654 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002655 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07002656 Err(e) => {
Zach Reizner297ae772020-02-21 14:45:14 -08002657 if let MsgError::RecvZero = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002658 vm_control_indices_to_remove.push(index);
2659 } else {
2660 error!("failed to recv VmRequest: {}", e);
2661 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002662 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07002663 },
Gurchetan Singh53edb812019-05-22 08:57:16 -07002664 TaggedControlSocket::VmMemory(socket) => match socket.recv() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002665 Ok(request) => {
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002666 let response = request.execute(
2667 &mut linux.vm,
2668 &mut linux.resources,
2669 Arc::clone(&map_request),
2670 );
Jakub Starond99cd0a2019-04-11 14:09:39 -07002671 if let Err(e) = socket.send(&response) {
Gurchetan Singh53edb812019-05-22 08:57:16 -07002672 error!("failed to send VmMemoryControlResponse: {}", e);
Jakub Starond99cd0a2019-04-11 14:09:39 -07002673 }
2674 }
2675 Err(e) => {
Zach Reizner297ae772020-02-21 14:45:14 -08002676 if let MsgError::RecvZero = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002677 vm_control_indices_to_remove.push(index);
2678 } else {
Gurchetan Singh53edb812019-05-22 08:57:16 -07002679 error!("failed to recv VmMemoryControlRequest: {}", e);
Jakub Starond99cd0a2019-04-11 14:09:39 -07002680 }
2681 }
2682 },
Xiong Zhang2515b752019-09-19 10:29:02 +08002683 TaggedControlSocket::VmIrq(socket) => match socket.recv() {
2684 Ok(request) => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002685 let response = {
2686 let irq_chip = &mut linux.irq_chip;
2687 request.execute(
2688 |setup| match setup {
2689 IrqSetup::Event(irq, ev) => {
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002690 if let Some(event_index) = irq_chip
2691 .register_irq_event(irq, ev, None)?
2692 {
2693 match wait_ctx.add(
2694 ev,
2695 Token::IrqFd {
2696 index: event_index
2697 },
2698 ) {
2699 Err(e) => {
2700 warn!("failed to add IrqFd to poll context: {}", e);
2701 Err(e)
2702 },
2703 Ok(_) => {
2704 Ok(())
2705 }
2706 }
2707 } else {
2708 Ok(())
2709 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002710 }
2711 IrqSetup::Route(route) => irq_chip.route_irq(route),
2712 },
2713 &mut linux.resources,
2714 )
2715 };
Xiong Zhang2515b752019-09-19 10:29:02 +08002716 if let Err(e) = socket.send(&response) {
2717 error!("failed to send VmIrqResponse: {}", e);
2718 }
2719 }
2720 Err(e) => {
Zach Reizner297ae772020-02-21 14:45:14 -08002721 if let MsgError::RecvZero = e {
Xiong Zhang2515b752019-09-19 10:29:02 +08002722 vm_control_indices_to_remove.push(index);
2723 } else {
2724 error!("failed to recv VmIrqRequest: {}", e);
2725 }
2726 }
2727 },
Daniel Verkampe1980a92020-02-07 11:00:55 -08002728 TaggedControlSocket::VmMsync(socket) => match socket.recv() {
2729 Ok(request) => {
2730 let response = request.execute(&mut linux.vm);
2731 if let Err(e) = socket.send(&response) {
2732 error!("failed to send VmMsyncResponse: {}", e);
2733 }
2734 }
2735 Err(e) => {
2736 if let MsgError::BadRecvSize { actual: 0, .. } = e {
2737 vm_control_indices_to_remove.push(index);
2738 } else {
2739 error!("failed to recv VmMsyncRequest: {}", e);
2740 }
2741 }
2742 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08002743 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002744 }
2745 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002746 }
2747 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002748
Michael Hoylee392c462020-10-07 03:29:24 -07002749 for event in events.iter().filter(|e| e.is_hungup) {
2750 match event.token {
Zach Reiznera60744b2019-02-13 17:33:32 -08002751 Token::Exit => {}
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002752 Token::Suspend => {}
Zach Reiznera60744b2019-02-13 17:33:32 -08002753 Token::ChildSignal => {}
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002754 Token::IrqFd { index: _ } => {}
Charles William Dick0bf8a552019-10-29 15:36:01 +09002755 Token::BalanceMemory => {}
2756 Token::BalloonResult => {}
Zach Reiznera60744b2019-02-13 17:33:32 -08002757 Token::VmControlServer => {}
2758 Token::VmControl { index } => {
2759 // It's possible more data is readable and buffered while the socket is hungup,
2760 // so don't delete the socket from the poll context until we're sure all the
2761 // data is read.
Jakub Starond99cd0a2019-04-11 14:09:39 -07002762 match control_sockets
2763 .get(index)
2764 .map(|s| s.as_ref().get_readable_bytes())
2765 {
Zach Reiznera60744b2019-02-13 17:33:32 -08002766 Some(Ok(0)) | Some(Err(_)) => vm_control_indices_to_remove.push(index),
2767 Some(Ok(x)) => info!("control index {} has {} bytes readable", index, x),
2768 _ => {}
Zach Reizner55a9e502018-10-03 10:22:32 -07002769 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002770 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002771 }
2772 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002773
2774 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08002775 // preserves correct indexes as each element is removed.
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07002776 vm_control_indices_to_remove.sort_unstable_by_key(|&k| Reverse(k));
Zach Reiznera60744b2019-02-13 17:33:32 -08002777 vm_control_indices_to_remove.dedup();
2778 for index in vm_control_indices_to_remove {
Michael Hoylee392c462020-10-07 03:29:24 -07002779 // Delete the socket from the `wait_ctx` synchronously. Otherwise, the kernel will do
2780 // this automatically when the FD inserted into the `wait_ctx` is closed after this
Zide Chen89584072019-11-14 10:33:51 -08002781 // if-block, but this removal can be deferred unpredictably. In some instances where the
Michael Hoylee392c462020-10-07 03:29:24 -07002782 // system is under heavy load, we can even get events returned by `wait_ctx` for an FD
Zide Chen89584072019-11-14 10:33:51 -08002783 // that has already been closed. Because the token associated with that spurious event
2784 // now belongs to a different socket, the control loop will start to interact with
2785 // sockets that might not be ready to use. This can cause incorrect hangup detection or
2786 // blocking on a socket that will never be ready. See also: crbug.com/1019986
2787 if let Some(socket) = control_sockets.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07002788 wait_ctx.delete(socket).map_err(Error::WaitContextDelete)?;
Zide Chen89584072019-11-14 10:33:51 -08002789 }
2790
2791 // This line implicitly drops the socket at `index` when it gets returned by
2792 // `swap_remove`. After this line, the socket at `index` is not the one from
2793 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
Michael Hoylee392c462020-10-07 03:29:24 -07002794 // use `wait_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznera60744b2019-02-13 17:33:32 -08002795 control_sockets.swap_remove(index);
2796 if let Some(socket) = control_sockets.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07002797 wait_ctx
2798 .modify(socket, EventType::Read, Token::VmControl { index })
2799 .map_err(Error::WaitContextAdd)?;
Zach Reiznera60744b2019-02-13 17:33:32 -08002800 }
2801 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002802 }
2803
Steven Richman11dc6712020-09-02 15:39:14 -07002804 kick_all_vcpus(&vcpu_handles, &linux.irq_chip, &VmRunMode::Exiting);
2805 for (handle, _) in vcpu_handles {
2806 if let Err(e) = handle.join() {
2807 error!("failed to join vcpu thread: {:?}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08002808 }
2809 }
2810
Daniel Verkamp94c35272019-09-12 13:31:30 -07002811 // Explicitly drop the VM structure here to allow the devices to clean up before the
2812 // control sockets are closed when this function exits.
2813 mem::drop(linux);
2814
Zach Reizner19ad1f32019-12-12 18:58:50 -08002815 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08002816 .set_canon_mode()
2817 .expect("failed to restore canonical mode for terminal");
2818
2819 Ok(())
2820}