blob: 2261b99a153a2079cba9d90a4914763aa6866af6 [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07005use std::cmp::{max, Reverse};
Jakub Starona3411ea2019-04-24 10:55:25 -07006use std::convert::TryFrom;
John Batesb220eac2020-09-14 17:03:02 -07007#[cfg(feature = "gpu")]
8use std::env;
David Tolnayfdac5ed2019-03-08 16:56:14 -08009use std::error::Error as StdError;
Dylan Reid059a1882018-07-23 17:58:09 -070010use std::ffi::CStr;
David Tolnayc69f9752019-03-01 18:07:56 -080011use std::fmt::{self, Display};
Dylan Reid059a1882018-07-23 17:58:09 -070012use std::fs::{File, OpenOptions};
Zach Reizner55a9e502018-10-03 10:22:32 -070013use std::io::{self, stdin, Read};
Steven Richmanf32d0b42020-06-20 21:45:32 -070014use std::iter;
Daniel Verkamp94c35272019-09-12 13:31:30 -070015use std::mem;
David Tolnay2b089fc2019-03-04 15:33:22 -080016use std::net::Ipv4Addr;
Daniel Verkamp6f9215c2019-08-20 09:41:22 -070017#[cfg(feature = "gpu")]
Zach Reizner0f2cfb02019-06-19 17:46:03 -070018use std::num::NonZeroU8;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +090019use std::num::ParseIntError;
Jakub Starond99cd0a2019-04-11 14:09:39 -070020use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
Zach Reiznera60744b2019-02-13 17:33:32 -080021use std::os::unix::net::UnixStream;
Zach Reizner39aa26b2017-12-12 18:03:23 -080022use std::path::{Path, PathBuf};
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +090023use std::ptr;
Chirantan Ekbote448516e2018-07-24 16:07:42 -070024use std::str;
Dylan Reid059a1882018-07-23 17:58:09 -070025use std::sync::{Arc, Barrier};
Zach Reizner39aa26b2017-12-12 18:03:23 -080026use std::thread;
27use std::thread::JoinHandle;
Charles William Dick0bf8a552019-10-29 15:36:01 +090028use std::time::Duration;
Zach Reizner39aa26b2017-12-12 18:03:23 -080029
David Tolnay41a6f842019-03-01 16:18:44 -080030use libc::{self, c_int, gid_t, uid_t};
Zach Reizner39aa26b2017-12-12 18:03:23 -080031
Tomasz Jeznach42644642020-05-20 23:27:59 -070032use acpi_tables::sdt::SDT;
33
Michael Hoyle6b196952020-08-02 20:09:41 -070034use base::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Zach Reizner65b98f12019-11-22 17:34:58 -080035#[cfg(feature = "gpu")]
36use devices::virtio::EventDevice;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070037use devices::virtio::{self, Console, VirtioDevice};
paulhsiace17e6e2020-08-28 18:37:45 +080038#[cfg(feature = "audio")]
39use devices::Ac97Dev;
Xiong Zhang17b0daf2019-04-23 17:14:50 +080040use devices::{
Steven Richmanf32d0b42020-06-20 21:45:32 -070041 self, HostBackendDeviceProvider, KvmKernelIrqChip, PciDevice, VfioContainer, VfioDevice,
42 VfioPciDevice, VirtioPciDevice, XhciController,
Xiong Zhang17b0daf2019-04-23 17:14:50 +080043};
Steven Richmanf32d0b42020-06-20 21:45:32 -070044use hypervisor::kvm::{Kvm, KvmVcpu, KvmVm};
Zach Reizner304e7312020-09-29 16:00:24 -070045use hypervisor::{HypervisorCap, Vcpu, VcpuExit, VcpuRunHandle, Vm, VmCap};
Allen Webbf3024c82020-06-19 07:19:48 -070046use minijail::{self, Minijail};
Zach Reiznera60744b2019-02-13 17:33:32 -080047use msg_socket::{MsgError, MsgReceiver, MsgSender, MsgSocket};
David Tolnay2b089fc2019-03-04 15:33:22 -080048use net_util::{Error as NetError, MacAddress, Tap};
David Tolnay3df35522019-03-11 12:36:30 -070049use remain::sorted;
Xiong Zhang87a3b442019-10-29 17:32:44 +080050use resources::{Alloc, MmioType, SystemAllocator};
Zach Reizner6a8fdd92019-01-16 14:38:41 -080051use sync::{Condvar, Mutex};
Jakub Starona3411ea2019-04-24 10:55:25 -070052
Michael Hoyle6b196952020-08-02 20:09:41 -070053use base::{
David Tolnay633426a2019-04-12 12:18:35 -070054 self, block_signal, clear_signal, drop_capabilities, error, flock, get_blocked_signals,
Fletcher Woodruff82ff3972019-10-02 13:11:34 -060055 get_group_id, get_user_id, getegid, geteuid, info, register_rt_signal_handler,
Michael Hoylee392c462020-10-07 03:29:24 -070056 set_cpu_affinity, set_rt_prio_limit, set_rt_round_robin, signal, validate_raw_fd, warn,
57 AsRawDescriptor, Event, EventType, ExternalMapping, FlockOperation, Killable,
58 MemoryMappingArena, PollToken, Protection, RawDescriptor, ScopedEvent, SignalFd, Terminal,
59 Timer, WaitContext, SIGRTMIN,
Zach Reiznera60744b2019-02-13 17:33:32 -080060};
Jakub Starone7c59052019-04-09 12:31:14 -070061use vm_control::{
Jakub Staron1f828d72019-04-11 12:49:29 -070062 BalloonControlCommand, BalloonControlRequestSocket, BalloonControlResponseSocket,
Charles William Dick664cc3c2020-01-10 14:31:52 +090063 BalloonControlResult, DiskControlCommand, DiskControlRequestSocket, DiskControlResponseSocket,
Steven Richmanf32d0b42020-06-20 21:45:32 -070064 DiskControlResult, IrqSetup, UsbControlSocket, VmControlResponseSocket, VmIrqRequest,
65 VmIrqRequestSocket, VmIrqResponse, VmIrqResponseSocket, VmMemoryControlRequestSocket,
66 VmMemoryControlResponseSocket, VmMemoryRequest, VmMemoryResponse, VmMsyncRequest,
67 VmMsyncRequestSocket, VmMsyncResponse, VmMsyncResponseSocket, VmRunMode,
Jakub Starone7c59052019-04-09 12:31:14 -070068};
Dylan Reidec058d62020-07-20 20:21:11 -070069use vm_memory::{GuestAddress, GuestMemory};
Zach Reizner39aa26b2017-12-12 18:03:23 -080070
Daniel Verkamp50740ce2020-02-28 12:36:56 -080071use crate::{Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070072use arch::{
Daniel Verkampc677fb42020-09-08 13:47:49 -070073 self, LinuxArch, RunnableLinuxVm, SerialHardware, SerialParameters, VcpuAffinity,
74 VirtioDeviceStub, VmComponents, VmImage,
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070075};
Sonny Raoed517d12018-02-13 22:09:43 -080076
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080077#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070078use {
79 aarch64::AArch64 as Arch,
80 devices::{IrqChip, IrqChipAArch64 as IrqChipArch},
81 hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
82};
Zach Reizner55a9e502018-10-03 10:22:32 -070083#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070084use {
85 devices::{IrqChipX86_64, IrqChipX86_64 as IrqChipArch, KvmSplitIrqChip},
86 hypervisor::{VcpuX86_64, VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
87 x86_64::X8664arch as Arch,
88};
Zach Reizner39aa26b2017-12-12 18:03:23 -080089
David Tolnay3df35522019-03-11 12:36:30 -070090#[sorted]
Dylan Reid059a1882018-07-23 17:58:09 -070091#[derive(Debug)]
Zach Reizner39aa26b2017-12-12 18:03:23 -080092pub enum Error {
Michael Hoyle6b196952020-08-02 20:09:41 -070093 AddGpuDeviceMemory(base::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -070094 AddIrqChipVcpu(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -070095 AddPmemDeviceMemory(base::Error),
Lepton Wu60893882018-11-21 11:06:18 -080096 AllocateGpuDeviceAddress,
Jakub Starona3411ea2019-04-24 10:55:25 -070097 AllocatePmemDeviceAddress(resources::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -080098 BalloonDeviceNew(virtio::BalloonError),
Michael Hoyle6b196952020-08-02 20:09:41 -070099 BlockDeviceNew(base::Error),
100 BlockSignal(base::signal::Error),
David Tolnaybe034262019-03-04 17:48:36 -0800101 BuildVm(<Arch as LinuxArch>::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700102 ChownTpmStorage(base::Error),
Michael Hoyle685316f2020-09-16 15:29:20 -0700103 CloneEvent(base::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700104 CloneVcpu(base::Error),
105 ConfigureVcpu(<Arch as LinuxArch>::Error),
Andrew Scull1590e6f2020-03-18 18:00:47 +0000106 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +0800107 CreateAc97(devices::PciDeviceError),
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -0700108 CreateConsole(arch::serial::Error),
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700109 CreateDiskError(disk::Error),
Michael Hoyle685316f2020-09-16 15:29:20 -0700110 CreateEvent(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700111 CreateSignalFd(base::SignalFdError),
Zach Reizner8fb52112017-12-13 16:04:39 -0800112 CreateSocket(io::Error),
Chirantan Ekbote49fa08f2018-11-16 13:26:53 -0800113 CreateTapDevice(NetError),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700114 CreateTimer(base::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -0800115 CreateTpmStorage(PathBuf, io::Error),
Jingkui Wang100e6e42019-03-08 20:41:57 -0800116 CreateUsbProvider(devices::usb::host_backend::error::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700117 CreateVcpu(base::Error),
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800118 CreateVfioDevice(devices::vfio::VfioError),
Michael Hoylee392c462020-10-07 03:29:24 -0700119 CreateWaitContext(base::Error),
Allen Webbf3024c82020-06-19 07:19:48 -0700120 DeviceJail(minijail::Error),
121 DevicePivotRoot(minijail::Error),
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800122 Disk(PathBuf, io::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700123 DiskImageLock(base::Error),
124 DropCapabilities(base::Error),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900125 FsDeviceNew(virtio::fs::Error),
126 GetMaxOpenFiles(io::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700127 GetSignalMask(signal::Error),
Lepton Wu39133a02019-02-27 12:42:29 -0800128 InputDeviceNew(virtio::InputError),
129 InputEventsOpen(std::io::Error),
Dylan Reid20566442018-04-02 15:06:15 -0700130 InvalidFdPath,
Zach Reizner579bd2c2018-09-14 15:43:33 -0700131 InvalidWaylandPath,
Allen Webbf3024c82020-06-19 07:19:48 -0700132 IoJail(minijail::Error),
David Tolnayfdac5ed2019-03-08 16:56:14 -0800133 LoadKernel(Box<dyn StdError>),
Daniel Verkamp6a847062019-11-26 13:16:35 -0800134 MemoryTooLarge,
David Tolnay2b089fc2019-03-04 15:33:22 -0800135 NetDeviceNew(virtio::NetError),
Tomasz Jeznach42644642020-05-20 23:27:59 -0700136 OpenAcpiTable(PathBuf, io::Error),
Tristan Muntsinger4133b012018-12-21 16:01:56 -0800137 OpenAndroidFstab(PathBuf, io::Error),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700138 OpenBios(PathBuf, io::Error),
Daniel Verkampe403f5c2018-12-11 16:29:26 -0800139 OpenInitrd(PathBuf, io::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800140 OpenKernel(PathBuf, io::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -0800141 OpenVinput(PathBuf, io::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800142 P9DeviceNew(virtio::P9Error),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900143 ParseMaxOpenFiles(ParseIntError),
Lepton Wu39133a02019-02-27 12:42:29 -0800144 PivotRootDoesntExist(&'static str),
Jakub Starona3411ea2019-04-24 10:55:25 -0700145 PmemDeviceImageTooBig,
Michael Hoyle6b196952020-08-02 20:09:41 -0700146 PmemDeviceNew(base::Error),
Charles William Dick0bf8a552019-10-29 15:36:01 +0900147 ReadMemAvailable(io::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700148 RegisterBalloon(arch::DeviceRegistrationError),
149 RegisterBlock(arch::DeviceRegistrationError),
150 RegisterGpu(arch::DeviceRegistrationError),
151 RegisterNet(arch::DeviceRegistrationError),
152 RegisterP9(arch::DeviceRegistrationError),
153 RegisterRng(arch::DeviceRegistrationError),
Michael Hoyle6b196952020-08-02 20:09:41 -0700154 RegisterSignalHandler(base::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700155 RegisterWayland(arch::DeviceRegistrationError),
Michael Hoyle6b196952020-08-02 20:09:41 -0700156 ReserveGpuMemory(base::MmapError),
157 ReserveMemory(base::Error),
158 ReservePmemMemory(base::MmapError),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700159 ResetTimer(base::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800160 RngDeviceNew(virtio::RngError),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700161 RunnableVcpu(base::Error),
Allen Webbf3024c82020-06-19 07:19:48 -0700162 SettingGidMap(minijail::Error),
163 SettingMaxOpenFiles(minijail::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700164 SettingSignalMask(base::Error),
Allen Webbf3024c82020-06-19 07:19:48 -0700165 SettingUidMap(minijail::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700166 SignalFd(base::SignalFdError),
Zach Reizner8fb52112017-12-13 16:04:39 -0800167 SpawnVcpu(io::Error),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700168 Timer(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700169 ValidateRawFd(base::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800170 VhostNetDeviceNew(virtio::vhost::Error),
171 VhostVsockDeviceNew(virtio::vhost::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700172 VirtioPciDev(base::Error),
Michael Hoylee392c462020-10-07 03:29:24 -0700173 WaitContextAdd(base::Error),
174 WaitContextDelete(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700175 WaylandDeviceNew(base::Error),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800176}
177
David Tolnayc69f9752019-03-01 18:07:56 -0800178impl Display for Error {
David Tolnay3df35522019-03-11 12:36:30 -0700179 #[remain::check]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800180 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
David Tolnayc69f9752019-03-01 18:07:56 -0800181 use self::Error::*;
182
David Tolnay3df35522019-03-11 12:36:30 -0700183 #[sorted]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800184 match self {
Lepton Wu60893882018-11-21 11:06:18 -0800185 AddGpuDeviceMemory(e) => write!(f, "failed to add gpu device memory: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700186 AddIrqChipVcpu(e) => write!(f, "failed to add vcpu to irq chip: {}", e),
Jakub Starona3411ea2019-04-24 10:55:25 -0700187 AddPmemDeviceMemory(e) => write!(f, "failed to add pmem device memory: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800188 AllocateGpuDeviceAddress => write!(f, "failed to allocate gpu device guest address"),
Jakub Starona3411ea2019-04-24 10:55:25 -0700189 AllocatePmemDeviceAddress(e) => {
190 write!(f, "failed to allocate memory for pmem device: {}", e)
191 }
David Tolnayc69f9752019-03-01 18:07:56 -0800192 BalloonDeviceNew(e) => write!(f, "failed to create balloon: {}", e),
193 BlockDeviceNew(e) => write!(f, "failed to create block device: {}", e),
194 BlockSignal(e) => write!(f, "failed to block signal: {}", e),
David Tolnaybe034262019-03-04 17:48:36 -0800195 BuildVm(e) => write!(f, "The architecture failed to build the vm: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800196 ChownTpmStorage(e) => write!(f, "failed to chown tpm storage: {}", e),
Michael Hoyle685316f2020-09-16 15:29:20 -0700197 CloneEvent(e) => write!(f, "failed to clone event: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700198 CloneVcpu(e) => write!(f, "failed to clone vcpu: {}", e),
199 ConfigureVcpu(e) => write!(f, "failed to configure vcpu: {}", e),
Andrew Scull1590e6f2020-03-18 18:00:47 +0000200 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +0800201 CreateAc97(e) => write!(f, "failed to create ac97 device: {}", e),
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -0700202 CreateConsole(e) => write!(f, "failed to create console device: {}", e),
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700203 CreateDiskError(e) => write!(f, "failed to create virtual disk: {}", e),
Michael Hoyle685316f2020-09-16 15:29:20 -0700204 CreateEvent(e) => write!(f, "failed to create event: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800205 CreateSignalFd(e) => write!(f, "failed to create signalfd: {}", e),
206 CreateSocket(e) => write!(f, "failed to create socket: {}", e),
207 CreateTapDevice(e) => write!(f, "failed to create tap device: {}", e),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700208 CreateTimer(e) => write!(f, "failed to create Timer: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800209 CreateTpmStorage(p, e) => {
210 write!(f, "failed to create tpm storage dir {}: {}", p.display(), e)
211 }
Jingkui Wang100e6e42019-03-08 20:41:57 -0800212 CreateUsbProvider(e) => write!(f, "failed to create usb provider: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700213 CreateVcpu(e) => write!(f, "failed to create vcpu: {}", e),
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800214 CreateVfioDevice(e) => write!(f, "Failed to create vfio device {}", e),
Michael Hoylee392c462020-10-07 03:29:24 -0700215 CreateWaitContext(e) => write!(f, "failed to create wait context: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800216 DeviceJail(e) => write!(f, "failed to jail device: {}", e),
217 DevicePivotRoot(e) => write!(f, "failed to pivot root device: {}", e),
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800218 Disk(p, e) => write!(f, "failed to load disk image {}: {}", p.display(), e),
David Tolnayc69f9752019-03-01 18:07:56 -0800219 DiskImageLock(e) => write!(f, "failed to lock disk image: {}", e),
Dmitry Torokhov71006072019-03-06 10:56:51 -0800220 DropCapabilities(e) => write!(f, "failed to drop process capabilities: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900221 FsDeviceNew(e) => write!(f, "failed to create fs device: {}", e),
222 GetMaxOpenFiles(e) => write!(f, "failed to get max number of open files: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700223 GetSignalMask(e) => write!(f, "failed to retrieve signal mask for vcpu: {}", e),
David Tolnay64cd5ea2019-04-15 15:56:35 -0700224 InputDeviceNew(e) => write!(f, "failed to set up input device: {}", e),
225 InputEventsOpen(e) => write!(f, "failed to open event device: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800226 InvalidFdPath => write!(f, "failed parsing a /proc/self/fd/*"),
227 InvalidWaylandPath => write!(f, "wayland socket path has no parent or file name"),
David Tolnayfd0971d2019-03-04 17:15:57 -0800228 IoJail(e) => write!(f, "{}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800229 LoadKernel(e) => write!(f, "failed to load kernel: {}", e),
Daniel Verkamp6a847062019-11-26 13:16:35 -0800230 MemoryTooLarge => write!(f, "requested memory size too large"),
David Tolnayc69f9752019-03-01 18:07:56 -0800231 NetDeviceNew(e) => write!(f, "failed to set up virtio networking: {}", e),
Tomasz Jeznach42644642020-05-20 23:27:59 -0700232 OpenAcpiTable(p, e) => write!(f, "failed to open ACPI file {}: {}", p.display(), e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800233 OpenAndroidFstab(p, e) => write!(
David Tolnayb4bd00f2019-02-12 17:51:26 -0800234 f,
235 "failed to open android fstab file {}: {}",
236 p.display(),
237 e
238 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700239 OpenBios(p, e) => write!(f, "failed to open bios {}: {}", p.display(), e),
David Tolnay3df35522019-03-11 12:36:30 -0700240 OpenInitrd(p, e) => write!(f, "failed to open initrd {}: {}", p.display(), e),
241 OpenKernel(p, e) => write!(f, "failed to open kernel image {}: {}", p.display(), e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800242 OpenVinput(p, e) => write!(f, "failed to open vinput device {}: {}", p.display(), e),
David Tolnayc69f9752019-03-01 18:07:56 -0800243 P9DeviceNew(e) => write!(f, "failed to create 9p device: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900244 ParseMaxOpenFiles(e) => write!(f, "failed to parse max number of open files: {}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800245 PivotRootDoesntExist(p) => write!(f, "{} doesn't exist, can't jail devices.", p),
Jakub Starona3411ea2019-04-24 10:55:25 -0700246 PmemDeviceImageTooBig => {
247 write!(f, "failed to create pmem device: pmem device image too big")
248 }
249 PmemDeviceNew(e) => write!(f, "failed to create pmem device: {}", e),
Charles William Dick0bf8a552019-10-29 15:36:01 +0900250 ReadMemAvailable(e) => write!(f, "failed to read /proc/meminfo: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800251 RegisterBalloon(e) => write!(f, "error registering balloon device: {}", e),
252 RegisterBlock(e) => write!(f, "error registering block device: {}", e),
253 RegisterGpu(e) => write!(f, "error registering gpu device: {}", e),
254 RegisterNet(e) => write!(f, "error registering net device: {}", e),
255 RegisterP9(e) => write!(f, "error registering 9p device: {}", e),
256 RegisterRng(e) => write!(f, "error registering rng device: {}", e),
257 RegisterSignalHandler(e) => write!(f, "error registering signal handler: {}", e),
258 RegisterWayland(e) => write!(f, "error registering wayland device: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800259 ReserveGpuMemory(e) => write!(f, "failed to reserve gpu memory: {}", e),
260 ReserveMemory(e) => write!(f, "failed to reserve memory: {}", e),
Jakub Starona3411ea2019-04-24 10:55:25 -0700261 ReservePmemMemory(e) => write!(f, "failed to reserve pmem memory: {}", e),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700262 ResetTimer(e) => write!(f, "failed to reset Timer: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800263 RngDeviceNew(e) => write!(f, "failed to set up rng: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700264 RunnableVcpu(e) => write!(f, "failed to set thread id for vcpu: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800265 SettingGidMap(e) => write!(f, "error setting GID map: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900266 SettingMaxOpenFiles(e) => write!(f, "error setting max open files: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700267 SettingSignalMask(e) => write!(f, "failed to set the signal mask for vcpu: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800268 SettingUidMap(e) => write!(f, "error setting UID map: {}", e),
269 SignalFd(e) => write!(f, "failed to read signal fd: {}", e),
270 SpawnVcpu(e) => write!(f, "failed to spawn VCPU thread: {}", e),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700271 Timer(e) => write!(f, "failed to read timer fd: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800272 ValidateRawFd(e) => write!(f, "failed to validate raw fd: {}", e),
273 VhostNetDeviceNew(e) => write!(f, "failed to set up vhost networking: {}", e),
274 VhostVsockDeviceNew(e) => write!(f, "failed to set up virtual socket device: {}", e),
275 VirtioPciDev(e) => write!(f, "failed to create virtio pci dev: {}", e),
Michael Hoylee392c462020-10-07 03:29:24 -0700276 WaitContextAdd(e) => write!(f, "failed to add descriptor to wait context: {}", e),
277 WaitContextDelete(e) => {
278 write!(f, "failed to remove descriptor from wait context: {}", e)
279 }
David Tolnayc69f9752019-03-01 18:07:56 -0800280 WaylandDeviceNew(e) => write!(f, "failed to create wayland device: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800281 }
282 }
283}
284
Allen Webbf3024c82020-06-19 07:19:48 -0700285impl From<minijail::Error> for Error {
286 fn from(err: minijail::Error) -> Self {
David Tolnayfd0971d2019-03-04 17:15:57 -0800287 Error::IoJail(err)
288 }
289}
290
David Tolnayc69f9752019-03-01 18:07:56 -0800291impl std::error::Error for Error {}
Dylan Reid059a1882018-07-23 17:58:09 -0700292
Zach Reizner39aa26b2017-12-12 18:03:23 -0800293type Result<T> = std::result::Result<T, Error>;
294
Jakub Starond99cd0a2019-04-11 14:09:39 -0700295enum TaggedControlSocket {
296 Vm(VmControlResponseSocket),
Gurchetan Singh53edb812019-05-22 08:57:16 -0700297 VmMemory(VmMemoryControlResponseSocket),
Xiong Zhang2515b752019-09-19 10:29:02 +0800298 VmIrq(VmIrqResponseSocket),
Daniel Verkampe1980a92020-02-07 11:00:55 -0800299 VmMsync(VmMsyncResponseSocket),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700300}
301
302impl AsRef<UnixSeqpacket> for TaggedControlSocket {
303 fn as_ref(&self) -> &UnixSeqpacket {
304 use self::TaggedControlSocket::*;
305 match &self {
Chirantan Ekbote50582532020-01-16 16:49:14 +0900306 Vm(ref socket) => socket.as_ref(),
307 VmMemory(ref socket) => socket.as_ref(),
308 VmIrq(ref socket) => socket.as_ref(),
Daniel Verkampe1980a92020-02-07 11:00:55 -0800309 VmMsync(ref socket) => socket.as_ref(),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700310 }
311 }
312}
313
Michael Hoylee392c462020-10-07 03:29:24 -0700314impl AsRawDescriptor for TaggedControlSocket {
315 fn as_raw_descriptor(&self) -> RawDescriptor {
Jakub Starond99cd0a2019-04-11 14:09:39 -0700316 self.as_ref().as_raw_fd()
317 }
318}
319
Andrew Walbranf50bab62020-07-07 13:22:53 +0100320fn get_max_open_files() -> Result<u64> {
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +0900321 let mut buf = mem::MaybeUninit::<libc::rlimit64>::zeroed();
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900322
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +0900323 // Safe because this will only modify `buf` and we check the return value.
324 let res = unsafe { libc::prlimit64(0, libc::RLIMIT_NOFILE, ptr::null(), buf.as_mut_ptr()) };
325 if res == 0 {
326 // Safe because the kernel guarantees that the struct is fully initialized.
327 let limit = unsafe { buf.assume_init() };
328 Ok(limit.rlim_max)
329 } else {
330 Err(Error::GetMaxOpenFiles(io::Error::last_os_error()))
331 }
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900332}
333
Matt Delcoc24ad782020-02-14 13:24:36 -0800334struct SandboxConfig<'a> {
335 limit_caps: bool,
336 log_failures: bool,
337 seccomp_policy: &'a Path,
338 uid_map: Option<&'a str>,
339 gid_map: Option<&'a str>,
340}
341
Zach Reizner44863792019-06-26 14:22:08 -0700342fn create_base_minijail(
343 root: &Path,
Matt Delcoc24ad782020-02-14 13:24:36 -0800344 r_limit: Option<u64>,
345 config: Option<&SandboxConfig>,
Zach Reizner44863792019-06-26 14:22:08 -0700346) -> Result<Minijail> {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800347 // All child jails run in a new user namespace without any users mapped,
348 // they run as nobody unless otherwise configured.
David Tolnay5bbbf612018-12-01 17:49:30 -0800349 let mut j = Minijail::new().map_err(Error::DeviceJail)?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800350
351 if let Some(config) = config {
352 j.namespace_pids();
353 j.namespace_user();
354 j.namespace_user_disable_setgroups();
355 if config.limit_caps {
356 // Don't need any capabilities.
357 j.use_caps(0);
358 }
359 if let Some(uid_map) = config.uid_map {
360 j.uidmap(uid_map).map_err(Error::SettingUidMap)?;
361 }
362 if let Some(gid_map) = config.gid_map {
363 j.gidmap(gid_map).map_err(Error::SettingGidMap)?;
364 }
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900365 // Run in a new mount namespace.
366 j.namespace_vfs();
367
Matt Delcoc24ad782020-02-14 13:24:36 -0800368 // Run in an empty network namespace.
369 j.namespace_net();
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900370
371 // Don't allow the device to gain new privileges.
Matt Delcoc24ad782020-02-14 13:24:36 -0800372 j.no_new_privs();
373
374 // By default we'll prioritize using the pre-compiled .bpf over the .policy
375 // file (the .bpf is expected to be compiled using "trap" as the failure
376 // behavior instead of the default "kill" behavior).
377 // Refer to the code comment for the "seccomp-log-failures"
378 // command-line parameter for an explanation about why the |log_failures|
379 // flag forces the use of .policy files (and the build-time alternative to
380 // this run-time flag).
381 let bpf_policy_file = config.seccomp_policy.with_extension("bpf");
382 if bpf_policy_file.exists() && !config.log_failures {
383 j.parse_seccomp_program(&bpf_policy_file)
384 .map_err(Error::DeviceJail)?;
385 } else {
386 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP,
387 // which will correctly kill the entire device process if a worker
388 // thread commits a seccomp violation.
389 j.set_seccomp_filter_tsync();
390 if config.log_failures {
391 j.log_seccomp_filter_failures();
392 }
393 j.parse_seccomp_filters(&config.seccomp_policy.with_extension("policy"))
394 .map_err(Error::DeviceJail)?;
395 }
396 j.use_seccomp_filter();
397 // Don't do init setup.
398 j.run_as_init();
399 }
400
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900401 // Only pivot_root if we are not re-using the current root directory.
402 if root != Path::new("/") {
403 // It's safe to call `namespace_vfs` multiple times.
404 j.namespace_vfs();
405 j.enter_pivot_root(root).map_err(Error::DevicePivotRoot)?;
406 }
Matt Delco45caf912019-11-13 08:11:09 -0800407
Matt Delcoc24ad782020-02-14 13:24:36 -0800408 // Most devices don't need to open many fds.
409 let limit = if let Some(r) = r_limit { r } else { 1024u64 };
410 j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)
411 .map_err(Error::SettingMaxOpenFiles)?;
412
Zach Reizner39aa26b2017-12-12 18:03:23 -0800413 Ok(j)
414}
415
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800416fn simple_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
Lepton Wu9105e9f2019-03-14 11:38:31 -0700417 if cfg.sandbox {
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800418 let pivot_root: &str = option_env!("DEFAULT_PIVOT_ROOT").unwrap_or("/var/empty");
419 // A directory for a jailed device's pivot root.
420 let root_path = Path::new(pivot_root);
421 if !root_path.exists() {
422 return Err(Error::PivotRootDoesntExist(pivot_root));
423 }
424 let policy_path: PathBuf = cfg.seccomp_policy_dir.join(policy);
Matt Delcoc24ad782020-02-14 13:24:36 -0800425 let config = SandboxConfig {
426 limit_caps: true,
427 log_failures: cfg.seccomp_log_failures,
428 seccomp_policy: &policy_path,
429 uid_map: None,
430 gid_map: None,
431 };
432 Ok(Some(create_base_minijail(root_path, None, Some(&config))?))
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800433 } else {
434 Ok(None)
435 }
436}
437
David Tolnayfd0971d2019-03-04 17:15:57 -0800438type DeviceResult<T = VirtioDeviceStub> = std::result::Result<T, Error>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800439
440fn create_block_device(
441 cfg: &Config,
442 disk: &DiskOption,
Jakub Staronecf81e02019-04-11 11:43:39 -0700443 disk_device_socket: DiskControlResponseSocket,
David Tolnay2b089fc2019-03-04 15:33:22 -0800444) -> DeviceResult {
445 // Special case '/proc/self/fd/*' paths. The FD is already open, just use it.
446 let raw_image: File = if disk.path.parent() == Some(Path::new("/proc/self/fd")) {
447 // Safe because we will validate |raw_fd|.
448 unsafe { File::from_raw_fd(raw_fd_from_path(&disk.path)?) }
449 } else {
450 OpenOptions::new()
451 .read(true)
452 .write(!disk.read_only)
453 .open(&disk.path)
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800454 .map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?
David Tolnay2b089fc2019-03-04 15:33:22 -0800455 };
456 // Lock the disk image to prevent other crosvm instances from using it.
457 let lock_op = if disk.read_only {
458 FlockOperation::LockShared
459 } else {
460 FlockOperation::LockExclusive
461 };
462 flock(&raw_image, lock_op, true).map_err(Error::DiskImageLock)?;
463
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700464 let disk_file = disk::create_disk_file(raw_image).map_err(Error::CreateDiskError)?;
Daniel Verkampe73c80f2019-11-08 10:11:16 -0800465 let dev = virtio::Block::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100466 virtio::base_features(cfg.protected_vm),
Daniel Verkampe73c80f2019-11-08 10:11:16 -0800467 disk_file,
468 disk.read_only,
469 disk.sparse,
Daniel Verkamp27672232019-12-06 17:26:55 +1100470 disk.block_size,
Daniel Verkampe73c80f2019-11-08 10:11:16 -0800471 Some(disk_device_socket),
472 )
473 .map_err(Error::BlockDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800474
475 Ok(VirtioDeviceStub {
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700476 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800477 jail: simple_jail(&cfg, "block_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800478 })
479}
480
481fn create_rng_device(cfg: &Config) -> DeviceResult {
482 let dev = virtio::Rng::new().map_err(Error::RngDeviceNew)?;
483
484 Ok(VirtioDeviceStub {
485 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800486 jail: simple_jail(&cfg, "rng_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800487 })
488}
489
490#[cfg(feature = "tpm")]
491fn create_tpm_device(cfg: &Config) -> DeviceResult {
Michael Hoyle6b196952020-08-02 20:09:41 -0700492 use base::chown;
David Tolnay2b089fc2019-03-04 15:33:22 -0800493 use std::ffi::CString;
494 use std::fs;
495 use std::process;
David Tolnay2b089fc2019-03-04 15:33:22 -0800496
497 let tpm_storage: PathBuf;
Matt Delco45caf912019-11-13 08:11:09 -0800498 let mut tpm_jail = simple_jail(&cfg, "tpm_device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800499
500 match &mut tpm_jail {
501 Some(jail) => {
502 // Create a tmpfs in the device's root directory for tpm
503 // simulator storage. The size is 20*1024, or 20 KB.
504 jail.mount_with_data(
505 Path::new("none"),
506 Path::new("/"),
507 "tmpfs",
508 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
509 "size=20480",
510 )?;
511
512 let crosvm_ids = add_crosvm_user_to_jail(jail, "tpm")?;
513
514 let pid = process::id();
515 let tpm_pid_dir = format!("/run/vm/tpm.{}", pid);
516 tpm_storage = Path::new(&tpm_pid_dir).to_owned();
David Tolnayfd0971d2019-03-04 17:15:57 -0800517 fs::create_dir_all(&tpm_storage)
518 .map_err(|e| Error::CreateTpmStorage(tpm_storage.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800519 let tpm_pid_dir_c = CString::new(tpm_pid_dir).expect("no nul bytes");
David Tolnayfd0971d2019-03-04 17:15:57 -0800520 chown(&tpm_pid_dir_c, crosvm_ids.uid, crosvm_ids.gid)
521 .map_err(Error::ChownTpmStorage)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800522
523 jail.mount_bind(&tpm_storage, &tpm_storage, true)?;
524 }
525 None => {
526 // Path used inside cros_sdk which does not have /run/vm.
527 tpm_storage = Path::new("/tmp/tpm-simulator").to_owned();
528 }
529 }
530
531 let dev = virtio::Tpm::new(tpm_storage);
532
533 Ok(VirtioDeviceStub {
534 dev: Box::new(dev),
535 jail: tpm_jail,
536 })
537}
538
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800539fn create_single_touch_device(cfg: &Config, single_touch_spec: &TouchDeviceOption) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800540 let socket = single_touch_spec
541 .get_path()
542 .into_unix_stream()
543 .map_err(|e| {
544 error!("failed configuring virtio single touch: {:?}", e);
545 e
546 })?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800547
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800548 let (width, height) = single_touch_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700549 let dev = virtio::new_single_touch(
550 socket,
551 width,
552 height,
553 virtio::base_features(cfg.protected_vm),
554 )
555 .map_err(Error::InputDeviceNew)?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800556 Ok(VirtioDeviceStub {
557 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800558 jail: simple_jail(&cfg, "input_device")?,
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800559 })
560}
561
562fn create_trackpad_device(cfg: &Config, trackpad_spec: &TouchDeviceOption) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800563 let socket = trackpad_spec.get_path().into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800564 error!("failed configuring virtio trackpad: {}", e);
565 e
566 })?;
567
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800568 let (width, height) = trackpad_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700569 let dev = virtio::new_trackpad(
570 socket,
571 width,
572 height,
573 virtio::base_features(cfg.protected_vm),
574 )
575 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800576
577 Ok(VirtioDeviceStub {
578 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800579 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800580 })
581}
582
Zach Reizner65b98f12019-11-22 17:34:58 -0800583fn create_mouse_device<T: IntoUnixStream>(cfg: &Config, mouse_socket: T) -> DeviceResult {
584 let socket = mouse_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800585 error!("failed configuring virtio mouse: {}", e);
586 e
587 })?;
588
Noah Goldd4ca29b2020-10-27 12:21:52 -0700589 let dev = virtio::new_mouse(socket, virtio::base_features(cfg.protected_vm))
590 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800591
592 Ok(VirtioDeviceStub {
593 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800594 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800595 })
596}
597
Zach Reizner65b98f12019-11-22 17:34:58 -0800598fn create_keyboard_device<T: IntoUnixStream>(cfg: &Config, keyboard_socket: T) -> DeviceResult {
599 let socket = keyboard_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800600 error!("failed configuring virtio keyboard: {}", e);
601 e
602 })?;
603
Noah Goldd4ca29b2020-10-27 12:21:52 -0700604 let dev = virtio::new_keyboard(socket, virtio::base_features(cfg.protected_vm))
605 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800606
607 Ok(VirtioDeviceStub {
608 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800609 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800610 })
611}
612
613fn create_vinput_device(cfg: &Config, dev_path: &Path) -> DeviceResult {
614 let dev_file = OpenOptions::new()
615 .read(true)
616 .write(true)
617 .open(dev_path)
David Tolnayfd0971d2019-03-04 17:15:57 -0800618 .map_err(|e| Error::OpenVinput(dev_path.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800619
Noah Goldd4ca29b2020-10-27 12:21:52 -0700620 let dev = virtio::new_evdev(dev_file, virtio::base_features(cfg.protected_vm))
621 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800622
623 Ok(VirtioDeviceStub {
624 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800625 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800626 })
627}
628
Jakub Staron1f828d72019-04-11 12:49:29 -0700629fn create_balloon_device(cfg: &Config, socket: BalloonControlResponseSocket) -> DeviceResult {
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100630 let dev = virtio::Balloon::new(virtio::base_features(cfg.protected_vm), socket)
631 .map_err(Error::BalloonDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800632
633 Ok(VirtioDeviceStub {
634 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800635 jail: simple_jail(&cfg, "balloon_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800636 })
637}
638
639fn create_tap_net_device(cfg: &Config, tap_fd: RawFd) -> DeviceResult {
640 // Safe because we ensure that we get a unique handle to the fd.
641 let tap = unsafe {
642 Tap::from_raw_fd(validate_raw_fd(tap_fd).map_err(Error::ValidateRawFd)?)
643 .map_err(Error::CreateTapDevice)?
644 };
645
Xiong Zhang773c7072020-03-20 10:39:55 +0800646 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
647 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700648 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800649 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
650 vq_pairs = 1;
651 }
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100652 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +0100653 let dev = virtio::Net::from(features, tap, vq_pairs).map_err(Error::NetDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800654
655 Ok(VirtioDeviceStub {
656 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800657 jail: simple_jail(&cfg, "net_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800658 })
659}
660
661fn create_net_device(
662 cfg: &Config,
663 host_ip: Ipv4Addr,
664 netmask: Ipv4Addr,
665 mac_address: MacAddress,
666 mem: &GuestMemory,
667) -> DeviceResult {
Xiong Zhang773c7072020-03-20 10:39:55 +0800668 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
669 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700670 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800671 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
672 vq_pairs = 1;
673 }
674
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100675 let features = virtio::base_features(cfg.protected_vm);
David Tolnay2b089fc2019-03-04 15:33:22 -0800676 let dev = if cfg.vhost_net {
Will Deacon81d5adb2020-10-06 18:37:48 +0100677 let dev = virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(
678 features,
679 host_ip,
680 netmask,
681 mac_address,
682 mem,
683 )
684 .map_err(Error::VhostNetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800685 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800686 } else {
Will Deacon81d5adb2020-10-06 18:37:48 +0100687 let dev = virtio::Net::<Tap>::new(features, host_ip, netmask, mac_address, vq_pairs)
Xiong Zhang773c7072020-03-20 10:39:55 +0800688 .map_err(Error::NetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800689 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800690 };
691
692 let policy = if cfg.vhost_net {
Matt Delco45caf912019-11-13 08:11:09 -0800693 "vhost_net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800694 } else {
Matt Delco45caf912019-11-13 08:11:09 -0800695 "net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800696 };
697
698 Ok(VirtioDeviceStub {
699 dev,
700 jail: simple_jail(&cfg, policy)?,
701 })
702}
703
704#[cfg(feature = "gpu")]
705fn create_gpu_device(
706 cfg: &Config,
Michael Hoyle685316f2020-09-16 15:29:20 -0700707 exit_evt: &Event,
Gurchetan Singh7ec58fa2019-05-15 15:30:38 -0700708 gpu_device_socket: VmMemoryControlRequestSocket,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900709 gpu_sockets: Vec<virtio::resource_bridge::ResourceResponseSocket>,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900710 wayland_socket_path: Option<&PathBuf>,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700711 x_display: Option<String>,
Zach Reizner65b98f12019-11-22 17:34:58 -0800712 event_devices: Vec<EventDevice>,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700713 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800714) -> DeviceResult {
715 let jailed_wayland_path = Path::new("/wayland-0");
716
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700717 let mut display_backends = vec![
718 virtio::DisplayBackend::X(x_display),
Jason Macnak60eb1fb2020-01-09 14:36:29 -0800719 virtio::DisplayBackend::Stub,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700720 ];
721
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900722 if let Some(socket_path) = wayland_socket_path {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700723 display_backends.insert(
724 0,
725 virtio::DisplayBackend::Wayland(if cfg.sandbox {
726 Some(jailed_wayland_path.to_owned())
727 } else {
728 Some(socket_path.to_owned())
729 }),
730 );
731 }
732
David Tolnay2b089fc2019-03-04 15:33:22 -0800733 let dev = virtio::Gpu::new(
Michael Hoyle685316f2020-09-16 15:29:20 -0700734 exit_evt.try_clone().map_err(Error::CloneEvent)?,
Gurchetan Singh7ec58fa2019-05-15 15:30:38 -0700735 Some(gpu_device_socket),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700736 NonZeroU8::new(1).unwrap(), // number of scanouts
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900737 gpu_sockets,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700738 display_backends,
Jason Macnakcc7070b2019-11-06 14:48:12 -0800739 cfg.gpu_parameters.as_ref().unwrap(),
Zach Reizner65b98f12019-11-22 17:34:58 -0800740 event_devices,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700741 map_request,
742 cfg.sandbox,
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100743 virtio::base_features(cfg.protected_vm),
David Tolnay2b089fc2019-03-04 15:33:22 -0800744 );
745
Matt Delco45caf912019-11-13 08:11:09 -0800746 let jail = match simple_jail(&cfg, "gpu_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800747 Some(mut jail) => {
748 // Create a tmpfs in the device's root directory so that we can bind mount the
749 // dri directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
750 jail.mount_with_data(
751 Path::new("none"),
752 Path::new("/"),
753 "tmpfs",
754 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
755 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800756 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800757
758 // Device nodes required for DRM.
759 let sys_dev_char_path = Path::new("/sys/dev/char");
David Tolnayfd0971d2019-03-04 17:15:57 -0800760 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800761 let sys_devices_path = Path::new("/sys/devices");
David Tolnayfd0971d2019-03-04 17:15:57 -0800762 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
Jason Macnak23400522020-08-28 09:10:46 -0700763
David Tolnay2b089fc2019-03-04 15:33:22 -0800764 let drm_dri_path = Path::new("/dev/dri");
Jason Macnak23400522020-08-28 09:10:46 -0700765 if drm_dri_path.exists() {
766 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
767 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800768
John Batesb220eac2020-09-14 17:03:02 -0700769 // Prepare GPU shader disk cache directory.
770 if let Some(cache_dir) = cfg
771 .gpu_parameters
772 .as_ref()
773 .and_then(|params| params.cache_path.as_ref())
774 {
775 if cfg!(any(target_arch = "arm", target_arch = "aarch64")) && cfg.sandbox {
776 warn!("shader caching not yet supported on ARM with sandbox enabled");
777 env::set_var("MESA_GLSL_CACHE_DISABLE", "true");
778 } else {
John Bates04059732020-10-01 15:58:55 -0700779 env::set_var("MESA_GLSL_CACHE_DISABLE", "false");
John Batesb220eac2020-09-14 17:03:02 -0700780 env::set_var("MESA_GLSL_CACHE_DIR", cache_dir);
781 if let Some(cache_size) = cfg
782 .gpu_parameters
783 .as_ref()
784 .and_then(|params| params.cache_size.as_ref())
785 {
786 env::set_var("MESA_GLSL_CACHE_MAX_SIZE", cache_size);
787 }
788 let shadercache_path = Path::new(cache_dir);
789 jail.mount_bind(shadercache_path, shadercache_path, true)?;
790 }
791 }
792
David Riley06787c52019-07-24 12:09:07 -0700793 // If the ARM specific devices exist on the host, bind mount them in.
794 let mali0_path = Path::new("/dev/mali0");
795 if mali0_path.exists() {
796 jail.mount_bind(mali0_path, mali0_path, true)?;
797 }
798
799 let pvr_sync_path = Path::new("/dev/pvr_sync");
800 if pvr_sync_path.exists() {
801 jail.mount_bind(pvr_sync_path, pvr_sync_path, true)?;
802 }
803
David Tolnay2b089fc2019-03-04 15:33:22 -0800804 // Libraries that are required when mesa drivers are dynamically loaded.
David Riley06787c52019-07-24 12:09:07 -0700805 let lib_dirs = &["/usr/lib", "/usr/lib64", "/lib", "/lib64"];
806 for dir in lib_dirs {
807 let dir_path = Path::new(dir);
808 if dir_path.exists() {
809 jail.mount_bind(dir_path, dir_path, false)?;
810 }
811 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800812
813 // Bind mount the wayland socket into jail's root. This is necessary since each
814 // new wayland context must open() the socket.
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700815 if let Some(path) = wayland_socket_path {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900816 jail.mount_bind(path, jailed_wayland_path, true)?;
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700817 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800818
819 add_crosvm_user_to_jail(&mut jail, "gpu")?;
820
David Riley54e660b2019-07-24 17:22:50 -0700821 // pvr driver requires read access to /proc/self/task/*/comm.
822 let proc_path = Path::new("/proc");
823 jail.mount(
824 proc_path,
825 proc_path,
826 "proc",
827 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY) as usize,
828 )?;
829
David Tolnay2b089fc2019-03-04 15:33:22 -0800830 Some(jail)
831 }
832 None => None,
833 };
834
835 Ok(VirtioDeviceStub {
836 dev: Box::new(dev),
837 jail,
838 })
839}
840
841fn create_wayland_device(
842 cfg: &Config,
Gurchetan Singh53edb812019-05-22 08:57:16 -0700843 socket: VmMemoryControlRequestSocket,
David Tolnay2b089fc2019-03-04 15:33:22 -0800844 resource_bridge: Option<virtio::resource_bridge::ResourceRequestSocket>,
845) -> DeviceResult {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900846 let wayland_socket_dirs = cfg
847 .wayland_socket_paths
848 .iter()
849 .map(|(_name, path)| path.parent())
850 .collect::<Option<Vec<_>>>()
851 .ok_or(Error::InvalidWaylandPath)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800852
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100853 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +0100854 let dev = virtio::Wl::new(
855 features,
856 cfg.wayland_socket_paths.clone(),
857 socket,
858 resource_bridge,
859 )
860 .map_err(Error::WaylandDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800861
Matt Delco45caf912019-11-13 08:11:09 -0800862 let jail = match simple_jail(&cfg, "wl_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800863 Some(mut jail) => {
864 // Create a tmpfs in the device's root directory so that we can bind mount the wayland
865 // socket directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
866 jail.mount_with_data(
867 Path::new("none"),
868 Path::new("/"),
869 "tmpfs",
870 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
871 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800872 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800873
874 // Bind mount the wayland socket's directory into jail's root. This is necessary since
875 // each new wayland context must open() the socket. If the wayland socket is ever
876 // destroyed and remade in the same host directory, new connections will be possible
877 // without restarting the wayland device.
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900878 for dir in &wayland_socket_dirs {
879 jail.mount_bind(dir, dir, true)?;
880 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800881 add_crosvm_user_to_jail(&mut jail, "Wayland")?;
882
883 Some(jail)
884 }
885 None => None,
886 };
887
888 Ok(VirtioDeviceStub {
889 dev: Box::new(dev),
890 jail,
891 })
892}
893
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900894#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
895fn create_video_device(
896 cfg: &Config,
897 typ: devices::virtio::VideoDeviceType,
898 resource_bridge: virtio::resource_bridge::ResourceRequestSocket,
899) -> DeviceResult {
900 let jail = match simple_jail(&cfg, "video_device")? {
901 Some(mut jail) => {
902 match typ {
903 devices::virtio::VideoDeviceType::Decoder => {
904 add_crosvm_user_to_jail(&mut jail, "video-decoder")?
905 }
906 devices::virtio::VideoDeviceType::Encoder => {
907 add_crosvm_user_to_jail(&mut jail, "video-encoder")?
908 }
909 };
910
911 // Create a tmpfs in the device's root directory so that we can bind mount files.
912 jail.mount_with_data(
913 Path::new("none"),
914 Path::new("/"),
915 "tmpfs",
916 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
917 "size=67108864",
918 )?;
919
920 // Render node for libvda.
921 let dev_dri_path = Path::new("/dev/dri/renderD128");
922 jail.mount_bind(dev_dri_path, dev_dri_path, false)?;
923
David Stevense341d0a2020-10-08 18:02:32 +0900924 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
925 {
926 // Device nodes used by libdrm through minigbm in libvda on AMD devices.
927 let sys_dev_char_path = Path::new("/sys/dev/char");
928 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
929 let sys_devices_path = Path::new("/sys/devices");
930 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
931
932 // Required for loading dri libraries loaded by minigbm on AMD devices.
933 let lib_dir = Path::new("/usr/lib64");
934 jail.mount_bind(lib_dir, lib_dir, false)?;
935 }
936
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900937 // Device nodes required by libchrome which establishes Mojo connection in libvda.
938 let dev_urandom_path = Path::new("/dev/urandom");
939 jail.mount_bind(dev_urandom_path, dev_urandom_path, false)?;
940 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
941 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
942
943 Some(jail)
944 }
945 None => None,
946 };
947
948 Ok(VirtioDeviceStub {
949 dev: Box::new(devices::virtio::VideoDevice::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100950 virtio::base_features(cfg.protected_vm),
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900951 typ,
952 Some(resource_bridge),
953 )),
954 jail,
955 })
956}
957
958#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
959fn register_video_device(
960 devs: &mut Vec<VirtioDeviceStub>,
961 resource_bridges: &mut Vec<virtio::resource_bridge::ResourceResponseSocket>,
962 cfg: &Config,
963 typ: devices::virtio::VideoDeviceType,
964) -> std::result::Result<(), Error> {
965 let (video_socket, gpu_socket) =
966 virtio::resource_bridge::pair().map_err(Error::CreateSocket)?;
967 resource_bridges.push(gpu_socket);
968 devs.push(create_video_device(cfg, typ, video_socket)?);
969 Ok(())
970}
971
David Tolnay2b089fc2019-03-04 15:33:22 -0800972fn create_vhost_vsock_device(cfg: &Config, cid: u64, mem: &GuestMemory) -> DeviceResult {
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100973 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +0100974 let dev = virtio::vhost::Vsock::new(features, cid, mem).map_err(Error::VhostVsockDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800975
976 Ok(VirtioDeviceStub {
977 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800978 jail: simple_jail(&cfg, "vhost_vsock_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800979 })
980}
981
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900982fn create_fs_device(
983 cfg: &Config,
984 uid_map: &str,
985 gid_map: &str,
986 src: &Path,
987 tag: &str,
988 fs_cfg: virtio::fs::passthrough::Config,
989) -> DeviceResult {
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900990 let max_open_files = get_max_open_files()?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800991 let j = if cfg.sandbox {
992 let seccomp_policy = cfg.seccomp_policy_dir.join("fs_device");
993 let config = SandboxConfig {
994 limit_caps: false,
995 uid_map: Some(uid_map),
996 gid_map: Some(gid_map),
997 log_failures: cfg.seccomp_log_failures,
998 seccomp_policy: &seccomp_policy,
999 };
Chirantan Ekbote34d45e52020-04-20 18:15:02 +09001000 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
1001 // We want bind mounts from the parent namespaces to propagate into the fs device's
1002 // namespace.
1003 jail.set_remount_mode(libc::MS_SLAVE);
1004
1005 jail
Matt Delcoc24ad782020-02-14 13:24:36 -08001006 } else {
1007 create_base_minijail(src, Some(max_open_files), None)?
1008 };
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001009
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001010 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001011 // TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic
1012 // when num_queues > 1.
Will Deacon81d5adb2020-10-06 18:37:48 +01001013 let dev = virtio::fs::Fs::new(features, tag, 1, fs_cfg).map_err(Error::FsDeviceNew)?;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001014
1015 Ok(VirtioDeviceStub {
1016 dev: Box::new(dev),
1017 jail: Some(j),
1018 })
1019}
1020
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001021fn create_9p_device(
1022 cfg: &Config,
1023 uid_map: &str,
1024 gid_map: &str,
1025 src: &Path,
1026 tag: &str,
1027) -> DeviceResult {
1028 let max_open_files = get_max_open_files()?;
1029 let (jail, root) = if cfg.sandbox {
1030 let seccomp_policy = cfg.seccomp_policy_dir.join("9p_device");
1031 let config = SandboxConfig {
1032 limit_caps: false,
1033 uid_map: Some(uid_map),
1034 gid_map: Some(gid_map),
1035 log_failures: cfg.seccomp_log_failures,
1036 seccomp_policy: &seccomp_policy,
1037 };
David Tolnay2b089fc2019-03-04 15:33:22 -08001038
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001039 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
1040 // We want bind mounts from the parent namespaces to propagate into the 9p server's
1041 // namespace.
1042 jail.set_remount_mode(libc::MS_SLAVE);
Chirantan Ekbote055de382020-01-24 12:16:58 +09001043
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001044 // The shared directory becomes the root of the device's file system.
1045 let root = Path::new("/");
1046 (Some(jail), root)
1047 } else {
1048 // There's no mount namespace so we tell the server to treat the source directory as the
1049 // root.
1050 (None, src)
David Tolnay2b089fc2019-03-04 15:33:22 -08001051 };
1052
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001053 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +01001054 let dev = virtio::P9::new(features, root, tag).map_err(Error::P9DeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001055
1056 Ok(VirtioDeviceStub {
1057 dev: Box::new(dev),
1058 jail,
1059 })
1060}
1061
Jakub Starona3411ea2019-04-24 10:55:25 -07001062fn create_pmem_device(
1063 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001064 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001065 resources: &mut SystemAllocator,
1066 disk: &DiskOption,
1067 index: usize,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001068 pmem_device_socket: VmMsyncRequestSocket,
Jakub Starona3411ea2019-04-24 10:55:25 -07001069) -> DeviceResult {
1070 let fd = OpenOptions::new()
1071 .read(true)
1072 .write(!disk.read_only)
1073 .open(&disk.path)
Daniel Verkamp46d61ba2020-02-25 10:17:50 -08001074 .map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001075
Iliyan Malcheved149862020-04-17 23:57:47 +00001076 let arena_size = {
Daniel Verkamp46d61ba2020-02-25 10:17:50 -08001077 let metadata =
1078 std::fs::metadata(&disk.path).map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001079 let disk_len = metadata.len();
1080 // Linux requires pmem region sizes to be 2 MiB aligned. Linux will fill any partial page
1081 // at the end of an mmap'd file and won't write back beyond the actual file length, but if
1082 // we just align the size of the file to 2 MiB then access beyond the last page of the
1083 // mapped file will generate SIGBUS. So use a memory mapping arena that will provide
1084 // padding up to 2 MiB.
1085 let alignment = 2 * 1024 * 1024;
1086 let align_adjust = if disk_len % alignment != 0 {
1087 alignment - (disk_len % alignment)
1088 } else {
1089 0
1090 };
Iliyan Malcheved149862020-04-17 23:57:47 +00001091 disk_len
1092 .checked_add(align_adjust)
1093 .ok_or(Error::PmemDeviceImageTooBig)?
Jakub Starona3411ea2019-04-24 10:55:25 -07001094 };
1095
1096 let protection = {
1097 if disk.read_only {
1098 Protection::read()
1099 } else {
1100 Protection::read_write()
1101 }
1102 };
1103
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001104 let arena = {
Jakub Starona3411ea2019-04-24 10:55:25 -07001105 // Conversion from u64 to usize may fail on 32bit system.
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001106 let arena_size = usize::try_from(arena_size).map_err(|_| Error::PmemDeviceImageTooBig)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001107
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001108 let mut arena = MemoryMappingArena::new(arena_size).map_err(Error::ReservePmemMemory)?;
1109 arena
Iliyan Malcheved149862020-04-17 23:57:47 +00001110 .add_fd_offset_protection(0, arena_size, &fd, 0, protection)
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001111 .map_err(Error::ReservePmemMemory)?;
1112 arena
Jakub Starona3411ea2019-04-24 10:55:25 -07001113 };
1114
1115 let mapping_address = resources
Xiong Zhang383b3b52019-10-30 14:59:26 +08001116 .mmio_allocator(MmioType::High)
Jakub Starona3411ea2019-04-24 10:55:25 -07001117 .allocate_with_align(
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001118 arena_size,
Jakub Starona3411ea2019-04-24 10:55:25 -07001119 Alloc::PmemDevice(index),
1120 format!("pmem_disk_image_{}", index),
1121 // Linux kernel requires pmem namespaces to be 128 MiB aligned.
1122 128 * 1024 * 1024, /* 128 MiB */
1123 )
1124 .map_err(Error::AllocatePmemDeviceAddress)?;
1125
Daniel Verkampe1980a92020-02-07 11:00:55 -08001126 let slot = vm
Gurchetan Singh173fe622020-05-21 18:05:06 -07001127 .add_memory_region(
Daniel Verkampe1980a92020-02-07 11:00:55 -08001128 GuestAddress(mapping_address),
Gurchetan Singh173fe622020-05-21 18:05:06 -07001129 Box::new(arena),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001130 /* read_only = */ disk.read_only,
1131 /* log_dirty_pages = */ false,
1132 )
1133 .map_err(Error::AddPmemDeviceMemory)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001134
Daniel Verkampe1980a92020-02-07 11:00:55 -08001135 let dev = virtio::Pmem::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001136 virtio::base_features(cfg.protected_vm),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001137 fd,
1138 GuestAddress(mapping_address),
1139 slot,
1140 arena_size,
1141 Some(pmem_device_socket),
1142 )
1143 .map_err(Error::PmemDeviceNew)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001144
1145 Ok(VirtioDeviceStub {
1146 dev: Box::new(dev) as Box<dyn VirtioDevice>,
Matt Delco45caf912019-11-13 08:11:09 -08001147 jail: simple_jail(&cfg, "pmem_device")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07001148 })
1149}
1150
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001151fn create_console_device(cfg: &Config, param: &SerialParameters) -> DeviceResult {
1152 let mut keep_fds = Vec::new();
Michael Hoyle685316f2020-09-16 15:29:20 -07001153 let evt = Event::new().map_err(Error::CreateEvent)?;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001154 let dev = param
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001155 .create_serial_device::<Console>(cfg.protected_vm, &evt, &mut keep_fds)
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001156 .map_err(Error::CreateConsole)?;
1157
Nicholas Verne71e73d82020-07-08 17:19:55 +10001158 let jail = match simple_jail(&cfg, "serial")? {
1159 Some(mut jail) => {
1160 // Create a tmpfs in the device's root directory so that we can bind mount the
1161 // log socket directory into it.
1162 // The size=67108864 is size=64*1024*1024 or size=64MB.
1163 jail.mount_with_data(
1164 Path::new("none"),
1165 Path::new("/"),
1166 "tmpfs",
1167 (libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_NOSUID) as usize,
1168 "size=67108864",
1169 )?;
1170 add_crosvm_user_to_jail(&mut jail, "serial")?;
1171 let res = param.add_bind_mounts(&mut jail);
1172 if res.is_err() {
1173 error!("failed to add bind mounts for console device");
1174 }
1175 Some(jail)
1176 }
1177 None => None,
1178 };
1179
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001180 Ok(VirtioDeviceStub {
1181 dev: Box::new(dev),
Nicholas Verne71e73d82020-07-08 17:19:55 +10001182 jail, // TODO(dverkamp): use a separate policy for console?
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001183 })
1184}
1185
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -07001186// gpu_device_socket is not used when GPU support is disabled.
1187#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001188fn create_virtio_devices(
1189 cfg: &Config,
Zach Reizner55a9e502018-10-03 10:22:32 -07001190 mem: &GuestMemory,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001191 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001192 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001193 _exit_evt: &Event,
Gurchetan Singh53edb812019-05-22 08:57:16 -07001194 wayland_device_socket: VmMemoryControlRequestSocket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001195 gpu_device_socket: VmMemoryControlRequestSocket,
Jakub Staron1f828d72019-04-11 12:49:29 -07001196 balloon_device_socket: BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07001197 disk_device_sockets: &mut Vec<DiskControlResponseSocket>,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001198 pmem_device_sockets: &mut Vec<VmMsyncRequestSocket>,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001199 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnay2b089fc2019-03-04 15:33:22 -08001200) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -07001201 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -08001202
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001203 for (_, param) in cfg
1204 .serial_parameters
1205 .iter()
1206 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
1207 {
1208 let dev = create_console_device(cfg, param)?;
1209 devs.push(dev);
1210 }
1211
Zach Reizner8fb52112017-12-13 16:04:39 -08001212 for disk in &cfg.disks {
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001213 let disk_device_socket = disk_device_sockets.remove(0);
David Tolnay2b089fc2019-03-04 15:33:22 -08001214 devs.push(create_block_device(cfg, disk, disk_device_socket)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001215 }
1216
Jakub Starona3411ea2019-04-24 10:55:25 -07001217 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
Daniel Verkampe1980a92020-02-07 11:00:55 -08001218 let pmem_device_socket = pmem_device_sockets.remove(0);
1219 devs.push(create_pmem_device(
1220 cfg,
1221 vm,
1222 resources,
1223 pmem_disk,
1224 index,
1225 pmem_device_socket,
1226 )?);
Jakub Starona3411ea2019-04-24 10:55:25 -07001227 }
1228
David Tolnay2b089fc2019-03-04 15:33:22 -08001229 devs.push(create_rng_device(cfg)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001230
David Tolnayde6b29a2018-12-20 11:49:46 -08001231 #[cfg(feature = "tpm")]
1232 {
David Tolnay43f8e212019-02-13 17:28:16 -08001233 if cfg.software_tpm {
David Tolnay2b089fc2019-03-04 15:33:22 -08001234 devs.push(create_tpm_device(cfg)?);
David Tolnay43f8e212019-02-13 17:28:16 -08001235 }
David Tolnayde6b29a2018-12-20 11:49:46 -08001236 }
1237
Jorge E. Moreira99d3f082019-03-07 10:59:54 -08001238 if let Some(single_touch_spec) = &cfg.virtio_single_touch {
1239 devs.push(create_single_touch_device(cfg, single_touch_spec)?);
1240 }
1241
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001242 if let Some(trackpad_spec) = &cfg.virtio_trackpad {
David Tolnay2b089fc2019-03-04 15:33:22 -08001243 devs.push(create_trackpad_device(cfg, trackpad_spec)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001244 }
1245
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001246 if let Some(mouse_socket) = &cfg.virtio_mouse {
David Tolnay2b089fc2019-03-04 15:33:22 -08001247 devs.push(create_mouse_device(cfg, mouse_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001248 }
1249
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001250 if let Some(keyboard_socket) = &cfg.virtio_keyboard {
David Tolnay2b089fc2019-03-04 15:33:22 -08001251 devs.push(create_keyboard_device(cfg, keyboard_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001252 }
1253
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001254 for dev_path in &cfg.virtio_input_evdevs {
David Tolnay2b089fc2019-03-04 15:33:22 -08001255 devs.push(create_vinput_device(cfg, dev_path)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001256 }
1257
David Tolnay2b089fc2019-03-04 15:33:22 -08001258 devs.push(create_balloon_device(cfg, balloon_device_socket)?);
Dylan Reid295ccac2017-11-06 14:06:24 -08001259
Zach Reizner39aa26b2017-12-12 18:03:23 -08001260 // We checked above that if the IP is defined, then the netmask is, too.
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001261 for tap_fd in &cfg.tap_fd {
David Tolnay2b089fc2019-03-04 15:33:22 -08001262 devs.push(create_tap_net_device(cfg, *tap_fd)?);
Jorge E. Moreirab7952802019-02-12 16:43:05 -08001263 }
1264
David Tolnay2b089fc2019-03-04 15:33:22 -08001265 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
1266 (cfg.host_ip, cfg.netmask, cfg.mac_address)
1267 {
1268 devs.push(create_net_device(cfg, host_ip, netmask, mac_address, mem)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001269 }
1270
David Tolnayfa701712019-02-13 16:42:54 -08001271 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001272 let mut resource_bridges = Vec::<virtio::resource_bridge::ResourceResponseSocket>::new();
1273
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001274 if !cfg.wayland_socket_paths.is_empty() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001275 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
1276 let mut wl_resource_bridge = None::<virtio::resource_bridge::ResourceRequestSocket>;
1277
1278 #[cfg(feature = "gpu")]
1279 {
Jason Macnakcc7070b2019-11-06 14:48:12 -08001280 if cfg.gpu_parameters.is_some() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001281 let (wl_socket, gpu_socket) =
1282 virtio::resource_bridge::pair().map_err(Error::CreateSocket)?;
1283 resource_bridges.push(gpu_socket);
1284 wl_resource_bridge = Some(wl_socket);
1285 }
1286 }
1287
1288 devs.push(create_wayland_device(
1289 cfg,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001290 wayland_device_socket,
1291 wl_resource_bridge,
1292 )?);
1293 }
David Tolnayfa701712019-02-13 16:42:54 -08001294
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001295 #[cfg(feature = "video-decoder")]
1296 {
1297 if cfg.video_dec {
1298 register_video_device(
1299 &mut devs,
1300 &mut resource_bridges,
1301 cfg,
1302 devices::virtio::VideoDeviceType::Decoder,
1303 )?;
1304 }
1305 }
1306
1307 #[cfg(feature = "video-encoder")]
1308 {
1309 if cfg.video_enc {
1310 register_video_device(
1311 &mut devs,
1312 &mut resource_bridges,
1313 cfg,
1314 devices::virtio::VideoDeviceType::Encoder,
1315 )?;
1316 }
1317 }
1318
Zach Reizner3a8100a2017-09-13 19:15:43 -07001319 #[cfg(feature = "gpu")]
1320 {
Noah Golddc7f52b2020-02-01 13:01:58 -08001321 if let Some(gpu_parameters) = &cfg.gpu_parameters {
Zach Reizner65b98f12019-11-22 17:34:58 -08001322 let mut event_devices = Vec::new();
1323 if cfg.display_window_mouse {
1324 let (event_device_socket, virtio_dev_socket) =
1325 UnixStream::pair().map_err(Error::CreateSocket)?;
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001326 let (single_touch_width, single_touch_height) = cfg
1327 .virtio_single_touch
1328 .as_ref()
1329 .map(|single_touch_spec| single_touch_spec.get_size())
Noah Golddc7f52b2020-02-01 13:01:58 -08001330 .unwrap_or((gpu_parameters.display_width, gpu_parameters.display_height));
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001331 let dev = virtio::new_single_touch(
1332 virtio_dev_socket,
1333 single_touch_width,
1334 single_touch_height,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001335 virtio::base_features(cfg.protected_vm),
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001336 )
1337 .map_err(Error::InputDeviceNew)?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001338 devs.push(VirtioDeviceStub {
1339 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -08001340 jail: simple_jail(&cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001341 });
1342 event_devices.push(EventDevice::touchscreen(event_device_socket));
1343 }
1344 if cfg.display_window_keyboard {
1345 let (event_device_socket, virtio_dev_socket) =
1346 UnixStream::pair().map_err(Error::CreateSocket)?;
Noah Goldd4ca29b2020-10-27 12:21:52 -07001347 let dev = virtio::new_keyboard(
1348 virtio_dev_socket,
1349 virtio::base_features(cfg.protected_vm),
1350 )
1351 .map_err(Error::InputDeviceNew)?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001352 devs.push(VirtioDeviceStub {
1353 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -08001354 jail: simple_jail(&cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001355 });
1356 event_devices.push(EventDevice::keyboard(event_device_socket));
1357 }
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001358 devs.push(create_gpu_device(
1359 cfg,
1360 _exit_evt,
1361 gpu_device_socket,
1362 resource_bridges,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001363 // Use the unnamed socket for GPU display screens.
1364 cfg.wayland_socket_paths.get(""),
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001365 cfg.x_display.clone(),
Zach Reizner65b98f12019-11-22 17:34:58 -08001366 event_devices,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001367 map_request,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001368 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -07001369 }
1370 }
1371
Zach Reizneraa575662018-08-15 10:46:32 -07001372 if let Some(cid) = cfg.cid {
David Tolnay2b089fc2019-03-04 15:33:22 -08001373 devs.push(create_vhost_vsock_device(cfg, cid, mem)?);
Zach Reizneraa575662018-08-15 10:46:32 -07001374 }
1375
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001376 for shared_dir in &cfg.shared_dirs {
1377 let SharedDir {
1378 src,
1379 tag,
1380 kind,
1381 uid_map,
1382 gid_map,
1383 cfg: fs_cfg,
1384 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -08001385
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001386 let dev = match kind {
1387 SharedDirKind::FS => create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone())?,
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001388 SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag)?,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001389 };
1390 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -08001391 }
1392
1393 Ok(devs)
1394}
1395
1396fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -06001397 cfg: &Config,
David Tolnay2b089fc2019-03-04 15:33:22 -08001398 mem: &GuestMemory,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001399 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001400 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001401 exit_evt: &Event,
Xiong Zhanga5d248c2019-09-17 14:17:19 -07001402 control_sockets: &mut Vec<TaggedControlSocket>,
Gurchetan Singh53edb812019-05-22 08:57:16 -07001403 wayland_device_socket: VmMemoryControlRequestSocket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001404 gpu_device_socket: VmMemoryControlRequestSocket,
Jakub Staron1f828d72019-04-11 12:49:29 -07001405 balloon_device_socket: BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07001406 disk_device_sockets: &mut Vec<DiskControlResponseSocket>,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001407 pmem_device_sockets: &mut Vec<VmMsyncRequestSocket>,
Jingkui Wang100e6e42019-03-08 20:41:57 -08001408 usb_provider: HostBackendDeviceProvider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001409 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnayfdac5ed2019-03-08 16:56:14 -08001410) -> DeviceResult<Vec<(Box<dyn PciDevice>, Option<Minijail>)>> {
David Tolnay2b089fc2019-03-04 15:33:22 -08001411 let stubs = create_virtio_devices(
1412 &cfg,
1413 mem,
Jakub Starona3411ea2019-04-24 10:55:25 -07001414 vm,
1415 resources,
David Tolnay2b089fc2019-03-04 15:33:22 -08001416 exit_evt,
1417 wayland_device_socket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001418 gpu_device_socket,
David Tolnay2b089fc2019-03-04 15:33:22 -08001419 balloon_device_socket,
1420 disk_device_sockets,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001421 pmem_device_sockets,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001422 map_request,
David Tolnay2b089fc2019-03-04 15:33:22 -08001423 )?;
1424
1425 let mut pci_devices = Vec::new();
1426
1427 for stub in stubs {
Daniel Verkampbb712d62019-11-19 09:47:33 -08001428 let (msi_host_socket, msi_device_socket) =
1429 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
1430 control_sockets.push(TaggedControlSocket::VmIrq(msi_host_socket));
1431 let dev = VirtioPciDevice::new(mem.clone(), stub.dev, msi_device_socket)
1432 .map_err(Error::VirtioPciDev)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -08001433 let dev = Box::new(dev) as Box<dyn PciDevice>;
David Tolnay2b089fc2019-03-04 15:33:22 -08001434 pci_devices.push((dev, stub.jail));
1435 }
1436
Andrew Scull1590e6f2020-03-18 18:00:47 +00001437 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +08001438 for ac97_param in &cfg.ac97_parameters {
1439 let dev = Ac97Dev::try_new(mem.clone(), ac97_param.clone()).map_err(Error::CreateAc97)?;
paulhsiace17e6e2020-08-28 18:37:45 +08001440 let jail = simple_jail(&cfg, dev.minijail_policy())?;
1441 pci_devices.push((Box::new(dev), jail));
David Tolnay2b089fc2019-03-04 15:33:22 -08001442 }
Andrew Scull1590e6f2020-03-18 18:00:47 +00001443
Jingkui Wang100e6e42019-03-08 20:41:57 -08001444 // Create xhci controller.
1445 let usb_controller = Box::new(XhciController::new(mem.clone(), usb_provider));
Matt Delco45caf912019-11-13 08:11:09 -08001446 pci_devices.push((usb_controller, simple_jail(&cfg, "xhci")?));
David Tolnay2b089fc2019-03-04 15:33:22 -08001447
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001448 if !cfg.vfio.is_empty() {
Xiong Zhangea6cf662019-11-11 18:32:02 +08001449 let vfio_container = Arc::new(Mutex::new(
1450 VfioContainer::new().map_err(Error::CreateVfioDevice)?,
1451 ));
1452
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001453 for vfio_path in &cfg.vfio {
Daniel Verkamp10154a92020-09-28 17:44:40 -07001454 // create MSI, MSI-X, and Mem request sockets for each vfio device
1455 let (vfio_host_socket_msi, vfio_device_socket_msi) =
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001456 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
Daniel Verkamp10154a92020-09-28 17:44:40 -07001457 control_sockets.push(TaggedControlSocket::VmIrq(vfio_host_socket_msi));
1458
1459 let (vfio_host_socket_msix, vfio_device_socket_msix) =
1460 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
1461 control_sockets.push(TaggedControlSocket::VmIrq(vfio_host_socket_msix));
Xiong Zhang4b5bb3a2019-04-23 17:15:21 +08001462
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001463 let (vfio_host_socket_mem, vfio_device_socket_mem) =
1464 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>()
1465 .map_err(Error::CreateSocket)?;
1466 control_sockets.push(TaggedControlSocket::VmMemory(vfio_host_socket_mem));
Xiong Zhang85abeff2019-04-23 17:15:24 +08001467
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001468 let vfiodevice = VfioDevice::new(vfio_path.as_path(), vm, mem, vfio_container.clone())
1469 .map_err(Error::CreateVfioDevice)?;
1470 let vfiopcidevice = Box::new(VfioPciDevice::new(
1471 vfiodevice,
Daniel Verkamp10154a92020-09-28 17:44:40 -07001472 vfio_device_socket_msi,
1473 vfio_device_socket_msix,
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001474 vfio_device_socket_mem,
1475 ));
1476 pci_devices.push((vfiopcidevice, simple_jail(&cfg, "vfio_device")?));
1477 }
Xiong Zhang17b0daf2019-04-23 17:14:50 +08001478 }
1479
David Tolnay2b089fc2019-03-04 15:33:22 -08001480 Ok(pci_devices)
1481}
1482
1483#[derive(Copy, Clone)]
Chirantan Ekbote1a2683b2019-11-26 16:28:23 +09001484#[cfg_attr(not(feature = "tpm"), allow(dead_code))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001485struct Ids {
1486 uid: uid_t,
1487 gid: gid_t,
1488}
1489
David Tolnay48c48292019-03-01 16:54:25 -08001490// Set the uid/gid for the jailed process and give a basic id map. This is
1491// required for bind mounts to work.
David Tolnayfd0971d2019-03-04 17:15:57 -08001492fn add_crosvm_user_to_jail(jail: &mut Minijail, feature: &str) -> Result<Ids> {
David Tolnay48c48292019-03-01 16:54:25 -08001493 let crosvm_user_group = CStr::from_bytes_with_nul(b"crosvm\0").unwrap();
1494
1495 let crosvm_uid = match get_user_id(&crosvm_user_group) {
1496 Ok(u) => u,
1497 Err(e) => {
1498 warn!("falling back to current user id for {}: {}", feature, e);
1499 geteuid()
1500 }
1501 };
1502
1503 let crosvm_gid = match get_group_id(&crosvm_user_group) {
1504 Ok(u) => u,
1505 Err(e) => {
1506 warn!("falling back to current group id for {}: {}", feature, e);
1507 getegid()
1508 }
1509 };
1510
1511 jail.change_uid(crosvm_uid);
1512 jail.change_gid(crosvm_gid);
1513 jail.uidmap(&format!("{0} {0} 1", crosvm_uid))
1514 .map_err(Error::SettingUidMap)?;
1515 jail.gidmap(&format!("{0} {0} 1", crosvm_gid))
1516 .map_err(Error::SettingGidMap)?;
1517
David Tolnay41a6f842019-03-01 16:18:44 -08001518 Ok(Ids {
1519 uid: crosvm_uid,
1520 gid: crosvm_gid,
1521 })
David Tolnay48c48292019-03-01 16:54:25 -08001522}
1523
David Tolnayfd0971d2019-03-04 17:15:57 -08001524fn raw_fd_from_path(path: &Path) -> Result<RawFd> {
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001525 if !path.is_file() {
David Tolnayfd0971d2019-03-04 17:15:57 -08001526 return Err(Error::InvalidFdPath);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001527 }
1528 let raw_fd = path
1529 .file_name()
1530 .and_then(|fd_osstr| fd_osstr.to_str())
1531 .and_then(|fd_str| fd_str.parse::<c_int>().ok())
1532 .ok_or(Error::InvalidFdPath)?;
David Tolnayfd0971d2019-03-04 17:15:57 -08001533 validate_raw_fd(raw_fd).map_err(Error::ValidateRawFd)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001534}
1535
Zach Reizner65b98f12019-11-22 17:34:58 -08001536trait IntoUnixStream {
1537 fn into_unix_stream(self) -> Result<UnixStream>;
1538}
1539
1540impl<'a> IntoUnixStream for &'a Path {
1541 fn into_unix_stream(self) -> Result<UnixStream> {
1542 if self.parent() == Some(Path::new("/proc/self/fd")) {
1543 // Safe because we will validate |raw_fd|.
1544 unsafe { Ok(UnixStream::from_raw_fd(raw_fd_from_path(self)?)) }
1545 } else {
1546 UnixStream::connect(self).map_err(Error::InputEventsOpen)
1547 }
1548 }
1549}
1550impl<'a> IntoUnixStream for &'a PathBuf {
1551 fn into_unix_stream(self) -> Result<UnixStream> {
1552 self.as_path().into_unix_stream()
1553 }
1554}
1555
1556impl IntoUnixStream for UnixStream {
1557 fn into_unix_stream(self) -> Result<UnixStream> {
1558 Ok(self)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001559 }
1560}
1561
Steven Richmanf32d0b42020-06-20 21:45:32 -07001562fn setup_vcpu_signal_handler<T: Vcpu>(use_hypervisor_signals: bool) -> Result<()> {
1563 if use_hypervisor_signals {
Matt Delco84cf9c02019-10-07 22:38:13 -07001564 unsafe {
1565 extern "C" fn handle_signal() {}
1566 // Our signal handler does nothing and is trivially async signal safe.
1567 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
1568 .map_err(Error::RegisterSignalHandler)?;
1569 }
1570 block_signal(SIGRTMIN() + 0).map_err(Error::BlockSignal)?;
1571 } else {
1572 unsafe {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001573 extern "C" fn handle_signal<T: Vcpu>() {
1574 T::set_local_immediate_exit(true);
Matt Delco84cf9c02019-10-07 22:38:13 -07001575 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001576 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal::<T>)
Matt Delco84cf9c02019-10-07 22:38:13 -07001577 .map_err(Error::RegisterSignalHandler)?;
1578 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001579 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001580 Ok(())
1581}
1582
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001583#[derive(Default)]
1584struct VcpuRunMode {
1585 mtx: Mutex<VmRunMode>,
1586 cvar: Condvar,
1587}
1588
1589impl VcpuRunMode {
1590 fn set_and_notify(&self, new_mode: VmRunMode) {
1591 *self.mtx.lock() = new_mode;
1592 self.cvar.notify_all();
1593 }
1594}
1595
Steven Richmanf32d0b42020-06-20 21:45:32 -07001596// Sets up a vcpu and converts it into a runnable vcpu.
Zach Reizner2c770e62020-09-30 16:49:59 -07001597fn runnable_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001598 cpu_id: usize,
1599 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07001600 vm: impl VmArch,
1601 irq_chip: &mut impl IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001602 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001603 run_rt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001604 vcpu_affinity: Vec<usize>,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001605 no_smt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001606 has_bios: bool,
1607 use_hypervisor_signals: bool,
Zach Reizner2c770e62020-09-30 16:49:59 -07001608) -> Result<(V, VcpuRunHandle)>
Steven Richmanf32d0b42020-06-20 21:45:32 -07001609where
Zach Reizner2c770e62020-09-30 16:49:59 -07001610 V: VcpuArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001611{
Zach Reizner304e7312020-09-29 16:00:24 -07001612 let mut vcpu = match vcpu {
1613 Some(v) => v,
1614 None => {
1615 // If vcpu is None, it means this arch/hypervisor requires create_vcpu to be called from
1616 // the vcpu thread.
1617 match vm
1618 .create_vcpu(cpu_id)
1619 .map_err(Error::CreateVcpu)?
1620 .downcast::<V>()
1621 {
1622 Ok(v) => *v,
1623 Err(_) => panic!("VM created wrong type of VCPU"),
1624 }
1625 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001626 };
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001627
Steven Richmanf32d0b42020-06-20 21:45:32 -07001628 irq_chip
Zach Reizner304e7312020-09-29 16:00:24 -07001629 .add_vcpu(cpu_id, &vcpu)
Steven Richmanf32d0b42020-06-20 21:45:32 -07001630 .map_err(Error::AddIrqChipVcpu)?;
1631
Daniel Verkampcaf9ced2020-09-29 15:35:02 -07001632 if !vcpu_affinity.is_empty() {
1633 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
1634 error!("Failed to set CPU affinity: {}", e);
1635 }
1636 }
1637
Steven Richmanf32d0b42020-06-20 21:45:32 -07001638 Arch::configure_vcpu(
1639 vm.get_memory(),
1640 vm.get_hypervisor(),
1641 irq_chip,
1642 &mut vcpu,
1643 cpu_id,
1644 vcpu_count,
1645 has_bios,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001646 no_smt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001647 )
1648 .map_err(Error::ConfigureVcpu)?;
1649
Steven Richmanf32d0b42020-06-20 21:45:32 -07001650 #[cfg(feature = "chromeos")]
1651 if let Err(e) = base::sched::enable_core_scheduling() {
1652 error!("Failed to enable core scheduling: {}", e);
1653 }
1654
Kansho Nishidaab205af2020-08-13 18:17:50 +09001655 if run_rt {
1656 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
1657 if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
1658 .and_then(|_| set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL)))
1659 {
1660 warn!("Failed to set vcpu to real time: {}", e);
1661 }
1662 }
1663
Steven Richmanf32d0b42020-06-20 21:45:32 -07001664 if use_hypervisor_signals {
1665 let mut v = get_blocked_signals().map_err(Error::GetSignalMask)?;
1666 v.retain(|&x| x != SIGRTMIN() + 0);
1667 vcpu.set_signal_mask(&v).map_err(Error::SettingSignalMask)?;
1668 }
1669
Zach Reizner2c770e62020-09-30 16:49:59 -07001670 let vcpu_run_handle = vcpu
1671 .take_run_handle(Some(SIGRTMIN() + 0))
1672 .map_err(Error::RunnableVcpu)?;
1673
1674 Ok((vcpu, vcpu_run_handle))
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001675}
1676
Zhuocheng Dingdb4c70d2019-12-02 15:50:24 +08001677#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Zach Reizner304e7312020-09-29 16:00:24 -07001678fn inject_interrupt(irq_chip: &mut dyn IrqChipX86_64, vcpu: &dyn VcpuX86_64, vcpu_id: usize) {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001679 if !irq_chip.interrupt_requested(vcpu_id) || !vcpu.ready_for_interrupt() {
1680 return;
1681 }
1682
1683 let vector = irq_chip
1684 .get_external_interrupt(vcpu_id)
1685 .unwrap_or_else(|e| {
1686 error!("get_external_interrupt failed on vcpu {}: {}", vcpu_id, e);
1687 None
1688 });
1689 if let Some(vector) = vector {
1690 if let Err(e) = vcpu.interrupt(vector as u32) {
1691 error!(
1692 "Failed to inject interrupt {} to vcpu {}: {}",
1693 vector, vcpu_id, e
1694 );
Zhuocheng Dingdb4c70d2019-12-02 15:50:24 +08001695 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001696 }
1697
1698 // The second interrupt request should be handled immediately, so ask vCPU to exit as soon as
1699 // possible.
1700 if irq_chip.interrupt_requested(vcpu_id) {
1701 vcpu.request_interrupt_window();
Zhuocheng Dingdb4c70d2019-12-02 15:50:24 +08001702 }
1703}
1704
1705#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Zach Reizner304e7312020-09-29 16:00:24 -07001706fn inject_interrupt(_irq_chip: &mut dyn IrqChip, _vcpu: &dyn Vcpu, _vcpu_id: usize) {}
Zhuocheng Dingdb4c70d2019-12-02 15:50:24 +08001707
Zach Reizner2c770e62020-09-30 16:49:59 -07001708fn run_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001709 cpu_id: usize,
1710 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07001711 vm: impl VmArch + 'static,
1712 mut irq_chip: impl IrqChipArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001713 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001714 run_rt: bool,
Daniel Verkamp107edb32019-04-05 09:58:48 -07001715 vcpu_affinity: Vec<usize>,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001716 no_smt: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001717 start_barrier: Arc<Barrier>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001718 has_bios: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001719 io_bus: devices::Bus,
1720 mmio_bus: devices::Bus,
Michael Hoyle685316f2020-09-16 15:29:20 -07001721 exit_evt: Event,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001722 requires_pvclock_ctrl: bool,
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001723 run_mode_arc: Arc<VcpuRunMode>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001724 use_hypervisor_signals: bool,
1725) -> Result<JoinHandle<()>>
1726where
Zach Reizner2c770e62020-09-30 16:49:59 -07001727 V: VcpuArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001728{
Zach Reizner8fb52112017-12-13 16:04:39 -08001729 thread::Builder::new()
1730 .name(format!("crosvm_vcpu{}", cpu_id))
1731 .spawn(move || {
Zach Reizner95885312020-01-29 18:06:01 -08001732 // The VCPU thread must trigger the `exit_evt` in all paths, and a `ScopedEvent`'s Drop
1733 // implementation accomplishes that.
1734 let _scoped_exit_evt = ScopedEvent::from(exit_evt);
1735
Zach Reizner2c770e62020-09-30 16:49:59 -07001736 let runnable_vcpu = runnable_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001737 cpu_id,
1738 vcpu,
1739 vm,
1740 &mut irq_chip,
1741 vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001742 run_rt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001743 vcpu_affinity,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001744 no_smt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001745 has_bios,
1746 use_hypervisor_signals,
1747 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08001748
Zach Reizner8fb52112017-12-13 16:04:39 -08001749 start_barrier.wait();
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001750
Zach Reizner2c770e62020-09-30 16:49:59 -07001751 let (vcpu, vcpu_run_handle) = match runnable_vcpu {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001752 Ok(v) => v,
1753 Err(e) => {
1754 error!("failed to start vcpu {}: {}", cpu_id, e);
1755 return;
1756 }
1757 };
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001758
Steven Richmanf32d0b42020-06-20 21:45:32 -07001759 loop {
1760 let mut interrupted_by_signal = false;
Zach Reizner2c770e62020-09-30 16:49:59 -07001761 match vcpu.run(&vcpu_run_handle) {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001762 Ok(VcpuExit::IoIn { port, mut size }) => {
1763 let mut data = [0; 8];
1764 if size > data.len() {
1765 error!("unsupported IoIn size of {} bytes", size);
1766 size = data.len();
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001767 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001768 io_bus.read(port as u64, &mut data[..size]);
1769 if let Err(e) = vcpu.set_data(&data[..size]) {
1770 error!("failed to set return data for IoIn: {}", e);
1771 }
1772 }
1773 Ok(VcpuExit::IoOut {
1774 port,
1775 mut size,
1776 data,
1777 }) => {
1778 if size > data.len() {
1779 error!("unsupported IoOut size of {} bytes", size);
1780 size = data.len();
1781 }
1782 io_bus.write(port as u64, &data[..size]);
1783 }
1784 Ok(VcpuExit::MmioRead { address, size }) => {
1785 let mut data = [0; 8];
1786 mmio_bus.read(address, &mut data[..size]);
1787 // Setting data for mmio can not fail.
1788 let _ = vcpu.set_data(&data[..size]);
1789 }
1790 Ok(VcpuExit::MmioWrite {
1791 address,
1792 size,
1793 data,
1794 }) => {
1795 mmio_bus.write(address, &data[..size]);
1796 }
1797 Ok(VcpuExit::IoapicEoi { vector }) => {
1798 if let Err(e) = irq_chip.broadcast_eoi(vector) {
1799 error!(
1800 "failed to broadcast eoi {} on vcpu {}: {}",
1801 vector, cpu_id, e
1802 );
1803 }
1804 }
1805 Ok(VcpuExit::Hlt) => break,
1806 Ok(VcpuExit::Shutdown) => break,
1807 Ok(VcpuExit::FailEntry {
1808 hardware_entry_failure_reason,
1809 }) => {
1810 error!("vcpu hw run failure: {:#x}", hardware_entry_failure_reason);
1811 break;
1812 }
1813 Ok(VcpuExit::SystemEvent(_, _)) => break,
1814 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
1815 Err(e) => match e.errno() {
1816 libc::EINTR => interrupted_by_signal = true,
1817 libc::EAGAIN => {}
1818 _ => {
1819 error!("vcpu hit unknown error: {}", e);
1820 break;
1821 }
1822 },
1823 }
1824
1825 if interrupted_by_signal {
1826 if use_hypervisor_signals {
1827 // Try to clear the signal that we use to kick VCPU if it is pending before
1828 // attempting to handle pause requests.
1829 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
1830 error!("failed to clear pending signal: {}", e);
1831 break;
1832 }
1833 } else {
1834 vcpu.set_immediate_exit(false);
1835 }
1836 let mut run_mode_lock = run_mode_arc.mtx.lock();
1837 loop {
1838 match *run_mode_lock {
1839 VmRunMode::Running => break,
1840 VmRunMode::Suspending => {
1841 // On KVM implementations that use a paravirtualized clock (e.g.
1842 // x86), a flag must be set to indicate to the guest kernel that a
1843 // VCPU was suspended. The guest kernel will use this flag to
1844 // prevent the soft lockup detection from triggering when this VCPU
1845 // resumes, which could happen days later in realtime.
1846 if requires_pvclock_ctrl {
1847 if let Err(e) = vcpu.pvclock_ctrl() {
1848 error!(
1849 "failed to tell hypervisor vcpu {} is suspending: {}",
1850 cpu_id, e
1851 );
Zach Reizner795355a2019-01-16 17:37:57 -08001852 }
1853 }
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001854 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001855 VmRunMode::Exiting => return,
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001856 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001857 // Give ownership of our exclusive lock to the condition variable that will
1858 // block. When the condition variable is notified, `wait` will unblock and
1859 // return a new exclusive lock.
1860 run_mode_lock = run_mode_arc.cvar.wait(run_mode_lock);
Zhuocheng Dingdb4c70d2019-12-02 15:50:24 +08001861 }
David Tolnay8f3a2322018-11-30 17:11:35 -08001862 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001863
Zach Reizner2c770e62020-09-30 16:49:59 -07001864 inject_interrupt(&mut irq_chip, &vcpu, cpu_id);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001865 }
David Tolnay2bac1e72018-12-12 14:33:42 -08001866 })
1867 .map_err(Error::SpawnVcpu)
Zach Reizner39aa26b2017-12-12 18:03:23 -08001868}
1869
Charles William Dick0bf8a552019-10-29 15:36:01 +09001870// Reads the contents of a file and converts the space-separated fields into a Vec of i64s.
Sonny Raod5f66082019-04-24 12:24:38 -07001871// Returns an error if any of the fields fail to parse.
Charles William Dick0bf8a552019-10-29 15:36:01 +09001872fn file_fields_to_i64<P: AsRef<Path>>(path: P) -> io::Result<Vec<i64>> {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001873 let mut file = File::open(path)?;
1874
1875 let mut buf = [0u8; 32];
1876 let count = file.read(&mut buf)?;
1877
Zach Reizner55a9e502018-10-03 10:22:32 -07001878 let content =
1879 str::from_utf8(&buf[..count]).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1880 content
1881 .trim()
Sonny Raod5f66082019-04-24 12:24:38 -07001882 .split_whitespace()
1883 .map(|x| {
Charles William Dick0bf8a552019-10-29 15:36:01 +09001884 x.parse::<i64>()
Sonny Raod5f66082019-04-24 12:24:38 -07001885 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
1886 })
1887 .collect()
1888}
1889
1890// Reads the contents of a file and converts them into a u64, and if there
1891// are multiple fields it only returns the first one.
Charles William Dick0bf8a552019-10-29 15:36:01 +09001892fn file_to_i64<P: AsRef<Path>>(path: P) -> io::Result<i64> {
1893 file_fields_to_i64(path)?
Sonny Raod5f66082019-04-24 12:24:38 -07001894 .into_iter()
1895 .next()
1896 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "empty file"))
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001897}
1898
Steven Richmanf32d0b42020-06-20 21:45:32 -07001899fn create_kvm(mem: GuestMemory) -> base::Result<KvmVm> {
1900 let kvm = Kvm::new()?;
1901 let vm = KvmVm::new(&kvm, mem)?;
1902 Ok(vm)
1903}
1904
1905fn create_kvm_kernel_irq_chip(
1906 vm: &KvmVm,
1907 vcpu_count: usize,
1908 _ioapic_device_socket: VmIrqRequestSocket,
Zach Reizner304e7312020-09-29 16:00:24 -07001909) -> base::Result<impl IrqChipArch> {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001910 let irq_chip = KvmKernelIrqChip::new(vm.try_clone()?, vcpu_count)?;
1911 Ok(irq_chip)
1912}
1913
1914#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1915fn create_kvm_split_irq_chip(
1916 vm: &KvmVm,
1917 vcpu_count: usize,
1918 ioapic_device_socket: VmIrqRequestSocket,
Zach Reizner304e7312020-09-29 16:00:24 -07001919) -> base::Result<impl IrqChipArch> {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001920 let irq_chip = KvmSplitIrqChip::new(vm.try_clone()?, vcpu_count, ioapic_device_socket)?;
1921 Ok(irq_chip)
1922}
1923
Dylan Reid059a1882018-07-23 17:58:09 -07001924pub fn run_config(cfg: Config) -> Result<()> {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001925 if cfg.split_irqchip {
1926 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
1927 {
1928 unimplemented!("KVM split irqchip mode only supported on x86 processors")
1929 }
1930
1931 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1932 {
Zach Reizner304e7312020-09-29 16:00:24 -07001933 run_vm::<_, KvmVcpu, _, _, _>(cfg, create_kvm, create_kvm_split_irq_chip)
Steven Richmanf32d0b42020-06-20 21:45:32 -07001934 }
1935 } else {
Zach Reizner304e7312020-09-29 16:00:24 -07001936 run_vm::<_, KvmVcpu, _, _, _>(cfg, create_kvm, create_kvm_kernel_irq_chip)
Steven Richmanf32d0b42020-06-20 21:45:32 -07001937 }
1938}
1939
Zach Reizner304e7312020-09-29 16:00:24 -07001940fn run_vm<V, Vcpu, I, FV, FI>(cfg: Config, create_vm: FV, create_irq_chip: FI) -> Result<()>
Steven Richmanf32d0b42020-06-20 21:45:32 -07001941where
1942 V: VmArch + 'static,
Zach Reizner304e7312020-09-29 16:00:24 -07001943 Vcpu: VcpuArch + 'static,
1944 I: IrqChipArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001945 FV: FnOnce(GuestMemory) -> base::Result<V>,
1946 FI: FnOnce(
1947 &V,
1948 usize, // vcpu_count
1949 VmIrqRequestSocket, // ioapic_device_socket
1950 ) -> base::Result<I>,
1951{
Lepton Wu9105e9f2019-03-14 11:38:31 -07001952 if cfg.sandbox {
Dylan Reid059a1882018-07-23 17:58:09 -07001953 // Printing something to the syslog before entering minijail so that libc's syslogger has a
1954 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
1955 // access to those files will not be possible.
1956 info!("crosvm entering multiprocess mode");
1957 }
1958
Jingkui Wang100e6e42019-03-08 20:41:57 -08001959 let (usb_control_socket, usb_provider) =
David Tolnay5fb3f512019-04-12 19:22:33 -07001960 HostBackendDeviceProvider::new().map_err(Error::CreateUsbProvider)?;
Dylan Reid059a1882018-07-23 17:58:09 -07001961 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
1962 // before any jailed devices have been spawned, so that we can catch any of them that fail very
1963 // quickly.
1964 let sigchld_fd = SignalFd::new(libc::SIGCHLD).map_err(Error::CreateSignalFd)?;
1965
David Tolnay2b089fc2019-03-04 15:33:22 -08001966 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
1967 Some(File::open(initrd_path).map_err(|e| Error::OpenInitrd(initrd_path.clone(), e))?)
Daniel Verkampe403f5c2018-12-11 16:29:26 -08001968 } else {
1969 None
1970 };
1971
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07001972 let vm_image = match cfg.executable_path {
1973 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
1974 File::open(kernel_path).map_err(|e| Error::OpenKernel(kernel_path.to_path_buf(), e))?,
1975 ),
1976 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
1977 File::open(bios_path).map_err(|e| Error::OpenBios(bios_path.to_path_buf(), e))?,
1978 ),
1979 _ => panic!("Did not receive a bios or kernel, should be impossible."),
1980 };
1981
Dylan Reid059a1882018-07-23 17:58:09 -07001982 let components = VmComponents {
Daniel Verkamp6a847062019-11-26 13:16:35 -08001983 memory_size: cfg
1984 .memory
1985 .unwrap_or(256)
1986 .checked_mul(1024 * 1024)
1987 .ok_or(Error::MemoryTooLarge)?,
Dylan Reid059a1882018-07-23 17:58:09 -07001988 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -07001989 vcpu_affinity: cfg.vcpu_affinity.clone(),
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001990 no_smt: cfg.no_smt,
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07001991 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -08001992 android_fstab: cfg
1993 .android_fstab
1994 .as_ref()
David Tolnay2b089fc2019-03-04 15:33:22 -08001995 .map(|x| File::open(x).map_err(|e| Error::OpenAndroidFstab(x.to_path_buf(), e)))
Tristan Muntsinger4133b012018-12-21 16:01:56 -08001996 .map_or(Ok(None), |v| v.map(Some))?,
Kansho Nishida282115b2019-12-18 13:13:14 +09001997 pstore: cfg.pstore.clone(),
Daniel Verkampe403f5c2018-12-11 16:29:26 -08001998 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -07001999 extra_kernel_params: cfg.params.clone(),
2000 wayland_dmabuf: cfg.wayland_dmabuf,
Tomasz Jeznach42644642020-05-20 23:27:59 -07002001 acpi_sdts: cfg
2002 .acpi_tables
2003 .iter()
2004 .map(|path| SDT::from_file(path).map_err(|e| Error::OpenAcpiTable(path.clone(), e)))
2005 .collect::<Result<Vec<SDT>>>()?,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002006 rt_cpus: cfg.rt_cpus.clone(),
Will Deacon7d2b8ac2020-10-06 18:51:12 +01002007 protected_vm: cfg.protected_vm,
Dylan Reid059a1882018-07-23 17:58:09 -07002008 };
2009
Zach Reiznera60744b2019-02-13 17:33:32 -08002010 let control_server_socket = match &cfg.socket_path {
2011 Some(path) => Some(UnlinkUnixSeqpacketListener(
2012 UnixSeqpacketListener::bind(path).map_err(Error::CreateSocket)?,
2013 )),
2014 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07002015 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002016
2017 let mut control_sockets = Vec::new();
Zach Reizner55a9e502018-10-03 10:22:32 -07002018 let (wayland_host_socket, wayland_device_socket) =
Gurchetan Singh53edb812019-05-22 08:57:16 -07002019 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?;
2020 control_sockets.push(TaggedControlSocket::VmMemory(wayland_host_socket));
Dylan Reid059a1882018-07-23 17:58:09 -07002021 // Balloon gets a special socket so balloon requests can be forwarded from the main process.
Zach Reizner55a9e502018-10-03 10:22:32 -07002022 let (balloon_host_socket, balloon_device_socket) =
Charles William Dick664cc3c2020-01-10 14:31:52 +09002023 msg_socket::pair::<BalloonControlCommand, BalloonControlResult>()
2024 .map_err(Error::CreateSocket)?;
Dylan Reid059a1882018-07-23 17:58:09 -07002025
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002026 // Create one control socket per disk.
2027 let mut disk_device_sockets = Vec::new();
2028 let mut disk_host_sockets = Vec::new();
2029 let disk_count = cfg.disks.len();
2030 for _ in 0..disk_count {
2031 let (disk_host_socket, disk_device_socket) =
Jakub Staronecf81e02019-04-11 11:43:39 -07002032 msg_socket::pair::<DiskControlCommand, DiskControlResult>()
2033 .map_err(Error::CreateSocket)?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002034 disk_host_sockets.push(disk_host_socket);
Jakub Starone7c59052019-04-09 12:31:14 -07002035 disk_device_sockets.push(disk_device_socket);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002036 }
2037
Daniel Verkampe1980a92020-02-07 11:00:55 -08002038 let mut pmem_device_sockets = Vec::new();
2039 let pmem_count = cfg.pmem_devices.len();
2040 for _ in 0..pmem_count {
2041 let (pmem_host_socket, pmem_device_socket) =
2042 msg_socket::pair::<VmMsyncResponse, VmMsyncRequest>().map_err(Error::CreateSocket)?;
2043 pmem_device_sockets.push(pmem_device_socket);
2044 control_sockets.push(TaggedControlSocket::VmMsync(pmem_host_socket));
2045 }
2046
Gurchetan Singh96beafc2019-05-15 09:46:52 -07002047 let (gpu_host_socket, gpu_device_socket) =
2048 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?;
2049 control_sockets.push(TaggedControlSocket::VmMemory(gpu_host_socket));
2050
Zhuocheng Dingf2e90bf2019-12-02 15:50:20 +08002051 let (ioapic_host_socket, ioapic_device_socket) =
2052 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
2053 control_sockets.push(TaggedControlSocket::VmIrq(ioapic_host_socket));
2054
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002055 let map_request: Arc<Mutex<Option<ExternalMapping>>> = Arc::new(Mutex::new(None));
2056
Zach Reizner304e7312020-09-29 16:00:24 -07002057 let linux: RunnableLinuxVm<_, Vcpu, _> = Arch::build_vm(
Trent Begin17ccaad2019-04-17 13:51:25 -06002058 components,
Trent Begin17ccaad2019-04-17 13:51:25 -06002059 &cfg.serial_parameters,
Matt Delco45caf912019-11-13 08:11:09 -08002060 simple_jail(&cfg, "serial")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07002061 |mem, vm, sys_allocator, exit_evt| {
Trent Begin17ccaad2019-04-17 13:51:25 -06002062 create_devices(
2063 &cfg,
Jakub Starona3411ea2019-04-24 10:55:25 -07002064 mem,
2065 vm,
2066 sys_allocator,
2067 exit_evt,
Xiong Zhanga5d248c2019-09-17 14:17:19 -07002068 &mut control_sockets,
Trent Begin17ccaad2019-04-17 13:51:25 -06002069 wayland_device_socket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07002070 gpu_device_socket,
Trent Begin17ccaad2019-04-17 13:51:25 -06002071 balloon_device_socket,
2072 &mut disk_device_sockets,
Daniel Verkampe1980a92020-02-07 11:00:55 -08002073 &mut pmem_device_sockets,
Trent Begin17ccaad2019-04-17 13:51:25 -06002074 usb_provider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002075 Arc::clone(&map_request),
Trent Begin17ccaad2019-04-17 13:51:25 -06002076 )
2077 },
Steven Richmanf32d0b42020-06-20 21:45:32 -07002078 create_vm,
2079 |vm, vcpu_count| create_irq_chip(vm, vcpu_count, ioapic_device_socket),
Trent Begin17ccaad2019-04-17 13:51:25 -06002080 )
David Tolnaybe034262019-03-04 17:48:36 -08002081 .map_err(Error::BuildVm)?;
Lepton Wu60893882018-11-21 11:06:18 -08002082
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002083 run_control(
2084 linux,
Zach Reiznera60744b2019-02-13 17:33:32 -08002085 control_server_socket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002086 control_sockets,
2087 balloon_host_socket,
2088 &disk_host_sockets,
Jingkui Wang100e6e42019-03-08 20:41:57 -08002089 usb_control_socket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002090 sigchld_fd,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002091 cfg.sandbox,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002092 Arc::clone(&map_request),
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002093 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07002094}
2095
Zach Reizner304e7312020-09-29 16:00:24 -07002096fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static, I: IrqChipArch + 'static>(
2097 mut linux: RunnableLinuxVm<V, Vcpu, I>,
Zach Reiznera60744b2019-02-13 17:33:32 -08002098 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Jakub Starond99cd0a2019-04-11 14:09:39 -07002099 mut control_sockets: Vec<TaggedControlSocket>,
Jakub Staron1f828d72019-04-11 12:49:29 -07002100 balloon_host_socket: BalloonControlRequestSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07002101 disk_host_sockets: &[DiskControlRequestSocket],
Jingkui Wang100e6e42019-03-08 20:41:57 -08002102 usb_control_socket: UsbControlSocket,
Zach Reizner55a9e502018-10-03 10:22:32 -07002103 sigchld_fd: SignalFd,
Lepton Wu20333e42019-03-14 10:48:03 -07002104 sandbox: bool,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002105 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Zach Reizner55a9e502018-10-03 10:22:32 -07002106) -> Result<()> {
David Tolnay5bbbf612018-12-01 17:49:30 -08002107 const LOWMEM_AVAILABLE: &str = "/sys/kernel/mm/chromeos-low_mem/available";
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002108
Zach Reizner5bed0d22018-03-28 02:31:11 -07002109 #[derive(PollToken)]
2110 enum Token {
2111 Exit,
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002112 Suspend,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002113 ChildSignal,
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002114 IrqFd { gsi: usize },
Charles William Dick0bf8a552019-10-29 15:36:01 +09002115 BalanceMemory,
2116 BalloonResult,
Zach Reiznera60744b2019-02-13 17:33:32 -08002117 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002118 VmControl { index: usize },
2119 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002120
Zach Reizner19ad1f32019-12-12 18:58:50 -08002121 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08002122 .set_raw_mode()
2123 .expect("failed to set terminal raw mode");
2124
Michael Hoylee392c462020-10-07 03:29:24 -07002125 let wait_ctx = WaitContext::build_with(&[
Zach Reiznerb2110be2019-07-23 15:55:03 -07002126 (&linux.exit_evt, Token::Exit),
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002127 (&linux.suspend_evt, Token::Suspend),
Zach Reiznerb2110be2019-07-23 15:55:03 -07002128 (&sigchld_fd, Token::ChildSignal),
2129 ])
Michael Hoylee392c462020-10-07 03:29:24 -07002130 .map_err(Error::WaitContextAdd)?;
Zach Reiznerb2110be2019-07-23 15:55:03 -07002131
Zach Reiznera60744b2019-02-13 17:33:32 -08002132 if let Some(socket_server) = &control_server_socket {
Michael Hoylee392c462020-10-07 03:29:24 -07002133 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08002134 .add(socket_server, Token::VmControlServer)
Michael Hoylee392c462020-10-07 03:29:24 -07002135 .map_err(Error::WaitContextAdd)?;
Zach Reiznera60744b2019-02-13 17:33:32 -08002136 }
Dylan Reid059a1882018-07-23 17:58:09 -07002137 for (index, socket) in control_sockets.iter().enumerate() {
Michael Hoylee392c462020-10-07 03:29:24 -07002138 wait_ctx
Zach Reizner55a9e502018-10-03 10:22:32 -07002139 .add(socket.as_ref(), Token::VmControl { index })
Michael Hoylee392c462020-10-07 03:29:24 -07002140 .map_err(Error::WaitContextAdd)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002141 }
2142
Steven Richmanf32d0b42020-06-20 21:45:32 -07002143 let events = linux
2144 .irq_chip
2145 .irq_event_tokens()
Michael Hoylee392c462020-10-07 03:29:24 -07002146 .map_err(Error::WaitContextAdd)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002147
2148 for (gsi, evt) in events {
Michael Hoylee392c462020-10-07 03:29:24 -07002149 wait_ctx
Steven Richmanf32d0b42020-06-20 21:45:32 -07002150 .add(&evt, Token::IrqFd { gsi: gsi as usize })
Michael Hoylee392c462020-10-07 03:29:24 -07002151 .map_err(Error::WaitContextAdd)?;
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002152 }
2153
Charles William Dick0bf8a552019-10-29 15:36:01 +09002154 // Balance available memory between guest and host every second.
Michael Hoyle08d86a42020-08-19 14:45:21 -07002155 let mut balancemem_timer = Timer::new().map_err(Error::CreateTimer)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002156 if Path::new(LOWMEM_AVAILABLE).exists() {
2157 // Create timer request balloon stats every 1s.
Michael Hoylee392c462020-10-07 03:29:24 -07002158 wait_ctx
Charles William Dick0bf8a552019-10-29 15:36:01 +09002159 .add(&balancemem_timer, Token::BalanceMemory)
Michael Hoylee392c462020-10-07 03:29:24 -07002160 .map_err(Error::WaitContextAdd)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002161 let balancemem_dur = Duration::from_secs(1);
2162 let balancemem_int = Duration::from_secs(1);
2163 balancemem_timer
2164 .reset(balancemem_dur, Some(balancemem_int))
Michael Hoyle08d86a42020-08-19 14:45:21 -07002165 .map_err(Error::ResetTimer)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002166
2167 // Listen for balloon statistics from the guest so we can balance.
Michael Hoylee392c462020-10-07 03:29:24 -07002168 wait_ctx
Charles William Dick0bf8a552019-10-29 15:36:01 +09002169 .add(&balloon_host_socket, Token::BalloonResult)
Michael Hoylee392c462020-10-07 03:29:24 -07002170 .map_err(Error::WaitContextAdd)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002171 } else {
2172 warn!("Unable to open low mem available, maybe not a chrome os kernel");
2173 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002174
Lepton Wu20333e42019-03-14 10:48:03 -07002175 if sandbox {
2176 // Before starting VCPUs, in case we started with some capabilities, drop them all.
2177 drop_capabilities().map_err(Error::DropCapabilities)?;
2178 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08002179
Steven Richmanf32d0b42020-06-20 21:45:32 -07002180 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
2181 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
Zach Reizner6a8fdd92019-01-16 14:38:41 -08002182 let run_mode_arc = Arc::new(VcpuRunMode::default());
Steven Richmanf32d0b42020-06-20 21:45:32 -07002183 let use_hypervisor_signals = !linux
2184 .vm
2185 .get_hypervisor()
2186 .check_capability(&HypervisorCap::ImmediateExit);
Zach Reizner304e7312020-09-29 16:00:24 -07002187 setup_vcpu_signal_handler::<Vcpu>(use_hypervisor_signals)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002188
Zach Reizner304e7312020-09-29 16:00:24 -07002189 let vcpus: Vec<Option<_>> = match linux.vcpus.take() {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002190 Some(vec) => vec.into_iter().map(|vcpu| Some(vcpu)).collect(),
2191 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
2192 };
Daniel Verkamp94c35272019-09-12 13:31:30 -07002193 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Daniel Verkampc677fb42020-09-08 13:47:49 -07002194 let vcpu_affinity = match linux.vcpu_affinity.clone() {
2195 Some(VcpuAffinity::Global(v)) => v,
2196 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(),
2197 None => Default::default(),
2198 };
Zach Reizner55a9e502018-10-03 10:22:32 -07002199 let handle = run_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002200 cpu_id,
Zach Reizner55a9e502018-10-03 10:22:32 -07002201 vcpu,
Michael Hoyle685316f2020-09-16 15:29:20 -07002202 linux.vm.try_clone().map_err(Error::CloneEvent)?,
2203 linux.irq_chip.try_clone().map_err(Error::CloneEvent)?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002204 linux.vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002205 linux.rt_cpus.contains(&cpu_id),
Daniel Verkampc677fb42020-09-08 13:47:49 -07002206 vcpu_affinity,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002207 linux.no_smt,
Zach Reizner55a9e502018-10-03 10:22:32 -07002208 vcpu_thread_barrier.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002209 linux.has_bios,
Zach Reizner55a9e502018-10-03 10:22:32 -07002210 linux.io_bus.clone(),
2211 linux.mmio_bus.clone(),
Michael Hoyle685316f2020-09-16 15:29:20 -07002212 linux.exit_evt.try_clone().map_err(Error::CloneEvent)?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002213 linux.vm.check_capability(VmCap::PvClockSuspend),
Zach Reizner6a8fdd92019-01-16 14:38:41 -08002214 run_mode_arc.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002215 use_hypervisor_signals,
Zach Reizner55a9e502018-10-03 10:22:32 -07002216 )?;
Dylan Reid059a1882018-07-23 17:58:09 -07002217 vcpu_handles.push(handle);
2218 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002219
Dylan Reid059a1882018-07-23 17:58:09 -07002220 vcpu_thread_barrier.wait();
2221
Michael Hoylee392c462020-10-07 03:29:24 -07002222 'wait: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002223 let events = {
Michael Hoylee392c462020-10-07 03:29:24 -07002224 match wait_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002225 Ok(v) => v,
2226 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08002227 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08002228 break;
2229 }
2230 }
2231 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002232
Steven Richmanf32d0b42020-06-20 21:45:32 -07002233 if let Err(e) = linux.irq_chip.process_delayed_irq_events() {
2234 warn!("can't deliver delayed irqs: {}", e);
2235 }
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002236
Zach Reiznera60744b2019-02-13 17:33:32 -08002237 let mut vm_control_indices_to_remove = Vec::new();
Michael Hoylee392c462020-10-07 03:29:24 -07002238 for event in events.iter().filter(|e| e.is_readable) {
2239 match event.token {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002240 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002241 info!("vcpu requested shutdown");
Michael Hoylee392c462020-10-07 03:29:24 -07002242 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002243 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002244 Token::Suspend => {
2245 info!("VM requested suspend");
2246 linux.suspend_evt.read().unwrap();
2247 run_mode_arc.set_and_notify(VmRunMode::Suspending);
2248 for handle in &vcpu_handles {
2249 let _ = handle.kill(SIGRTMIN() + 0);
2250 }
2251 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002252 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002253 // Print all available siginfo structs, then exit the loop.
David Tolnayf5032762018-12-03 10:46:45 -08002254 while let Some(siginfo) = sigchld_fd.read().map_err(Error::SignalFd)? {
Zach Reizner3ba00982019-01-23 19:04:43 -08002255 let pid = siginfo.ssi_pid;
2256 let pid_label = match linux.pid_debug_label_map.get(&pid) {
2257 Some(label) => format!("{} (pid {})", label, pid),
2258 None => format!("pid {}", pid),
2259 };
David Tolnayf5032762018-12-03 10:46:45 -08002260 error!(
2261 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08002262 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08002263 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08002264 }
Michael Hoylee392c462020-10-07 03:29:24 -07002265 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002266 }
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002267 Token::IrqFd { gsi } => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002268 if let Err(e) = linux.irq_chip.service_irq_event(gsi as u32) {
2269 error!("failed to signal irq {}: {}", gsi, e);
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002270 }
2271 }
Charles William Dick0bf8a552019-10-29 15:36:01 +09002272 Token::BalanceMemory => {
Michael Hoyle08d86a42020-08-19 14:45:21 -07002273 balancemem_timer.wait().map_err(Error::Timer)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002274 let command = BalloonControlCommand::Stats {};
2275 if let Err(e) = balloon_host_socket.send(&command) {
2276 warn!("failed to send stats request to balloon device: {}", e);
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002277 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002278 }
Charles William Dick0bf8a552019-10-29 15:36:01 +09002279 Token::BalloonResult => {
2280 match balloon_host_socket.recv() {
2281 Ok(BalloonControlResult::Stats {
2282 stats,
2283 balloon_actual: balloon_actual_u,
2284 }) => {
2285 // Available memory is reported in MB, and we need bytes.
2286 let host_available = file_to_i64(LOWMEM_AVAILABLE)
2287 .map_err(Error::ReadMemAvailable)?
2288 << 20;
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002289 let guest_free_u = if let Some(free) = stats.free_memory {
2290 free
Charles William Dick0bf8a552019-10-29 15:36:01 +09002291 } else {
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002292 warn!("guest free_memory stat is missing");
Charles William Dick0bf8a552019-10-29 15:36:01 +09002293 continue;
2294 };
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002295 let guest_cached_u = if let Some(cached) = stats.disk_caches {
2296 cached
2297 } else {
2298 warn!("guest disk_caches stat is missing");
2299 continue;
2300 };
2301 if guest_free_u > i64::max_value() as u64 {
2302 warn!("guest free memory is too large");
2303 continue;
2304 }
2305 if guest_cached_u > i64::max_value() as u64 {
2306 warn!("guest cached memory is too large");
Charles William Dick0bf8a552019-10-29 15:36:01 +09002307 continue;
2308 }
2309 if balloon_actual_u > i64::max_value() as u64 {
2310 warn!("actual balloon size is too large");
2311 continue;
2312 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002313 // Tell the guest to change the balloon size if the target balloon size
2314 // is more than 5% different from the current balloon size.
Charles William Dick0bf8a552019-10-29 15:36:01 +09002315 const RESIZE_PERCENT: i64 = 5;
2316 let balloon_actual = balloon_actual_u as i64;
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002317 let guest_free = guest_free_u as i64;
2318 let guest_cached = guest_cached_u as i64;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002319 // Compute how much memory the guest should have available after we
2320 // rebalance.
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002321 let guest_available_target = host_available;
2322 let guest_available_delta =
2323 guest_available_target - guest_free - guest_cached;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002324 // How much do we have to change the balloon to balance.
Charles William Dick0bf8a552019-10-29 15:36:01 +09002325 let balloon_target = max(balloon_actual - guest_available_delta, 0);
Steven Richmanf32d0b42020-06-20 21:45:32 -07002326 // Compute the change in balloon size in percent. If the balloon size
2327 // is 0, use 1 so we don't overflow from the infinity % increase.
Charles William Dick0bf8a552019-10-29 15:36:01 +09002328 let balloon_change_percent = (balloon_actual - balloon_target).abs()
2329 * 100
2330 / max(balloon_actual, 1);
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002331
Charles William Dick0bf8a552019-10-29 15:36:01 +09002332 if balloon_change_percent >= RESIZE_PERCENT {
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002333 info!("resizing balloon: host avail {}, guest free {} cached {} (target {}), balloon actual {} (target {})",
Daniel Verkamp1cd80992020-07-27 12:41:50 -07002334 host_available,
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002335 guest_free,
2336 guest_cached,
Daniel Verkamp1cd80992020-07-27 12:41:50 -07002337 guest_available_target,
2338 balloon_actual,
2339 balloon_target,
2340 );
Charles William Dick0bf8a552019-10-29 15:36:01 +09002341 let command = BalloonControlCommand::Adjust {
2342 num_bytes: balloon_target as u64,
2343 };
2344 if let Err(e) = balloon_host_socket.send(&command) {
2345 warn!("failed to send memory value to balloon device: {}", e);
2346 }
2347 }
2348 }
2349 Err(e) => {
2350 error!("failed to recv BalloonControlResult: {}", e);
2351 }
2352 };
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002353 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002354 Token::VmControlServer => {
2355 if let Some(socket_server) = &control_server_socket {
2356 match socket_server.accept() {
2357 Ok(socket) => {
Michael Hoylee392c462020-10-07 03:29:24 -07002358 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08002359 .add(
2360 &socket,
2361 Token::VmControl {
2362 index: control_sockets.len(),
2363 },
2364 )
Michael Hoylee392c462020-10-07 03:29:24 -07002365 .map_err(Error::WaitContextAdd)?;
Jakub Starond99cd0a2019-04-11 14:09:39 -07002366 control_sockets
2367 .push(TaggedControlSocket::Vm(MsgSocket::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08002368 }
2369 Err(e) => error!("failed to accept socket: {}", e),
2370 }
2371 }
2372 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002373 Token::VmControl { index } => {
Daniel Verkamp37c4a782019-01-04 10:44:17 -08002374 if let Some(socket) = control_sockets.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002375 match socket {
2376 TaggedControlSocket::Vm(socket) => match socket.recv() {
2377 Ok(request) => {
2378 let mut run_mode_opt = None;
2379 let response = request.execute(
2380 &mut run_mode_opt,
2381 &balloon_host_socket,
2382 disk_host_sockets,
2383 &usb_control_socket,
2384 );
2385 if let Err(e) = socket.send(&response) {
2386 error!("failed to send VmResponse: {}", e);
2387 }
2388 if let Some(run_mode) = run_mode_opt {
2389 info!("control socket changed run mode to {}", run_mode);
2390 match run_mode {
2391 VmRunMode::Exiting => {
Michael Hoylee392c462020-10-07 03:29:24 -07002392 break 'wait;
Jakub Starond99cd0a2019-04-11 14:09:39 -07002393 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002394 VmRunMode::Running => {
2395 if let VmRunMode::Suspending =
2396 *run_mode_arc.mtx.lock()
2397 {
2398 linux.io_bus.notify_resume();
2399 }
2400 run_mode_arc.set_and_notify(VmRunMode::Running);
2401 for handle in &vcpu_handles {
2402 let _ = handle.kill(SIGRTMIN() + 0);
2403 }
2404 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07002405 other => {
2406 run_mode_arc.set_and_notify(other);
2407 for handle in &vcpu_handles {
2408 let _ = handle.kill(SIGRTMIN() + 0);
2409 }
Zach Reizner6a8fdd92019-01-16 14:38:41 -08002410 }
2411 }
2412 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002413 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07002414 Err(e) => {
Zach Reizner297ae772020-02-21 14:45:14 -08002415 if let MsgError::RecvZero = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002416 vm_control_indices_to_remove.push(index);
2417 } else {
2418 error!("failed to recv VmRequest: {}", e);
2419 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002420 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07002421 },
Gurchetan Singh53edb812019-05-22 08:57:16 -07002422 TaggedControlSocket::VmMemory(socket) => match socket.recv() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002423 Ok(request) => {
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002424 let response = request.execute(
2425 &mut linux.vm,
2426 &mut linux.resources,
2427 Arc::clone(&map_request),
2428 );
Jakub Starond99cd0a2019-04-11 14:09:39 -07002429 if let Err(e) = socket.send(&response) {
Gurchetan Singh53edb812019-05-22 08:57:16 -07002430 error!("failed to send VmMemoryControlResponse: {}", e);
Jakub Starond99cd0a2019-04-11 14:09:39 -07002431 }
2432 }
2433 Err(e) => {
Zach Reizner297ae772020-02-21 14:45:14 -08002434 if let MsgError::RecvZero = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002435 vm_control_indices_to_remove.push(index);
2436 } else {
Gurchetan Singh53edb812019-05-22 08:57:16 -07002437 error!("failed to recv VmMemoryControlRequest: {}", e);
Jakub Starond99cd0a2019-04-11 14:09:39 -07002438 }
2439 }
2440 },
Xiong Zhang2515b752019-09-19 10:29:02 +08002441 TaggedControlSocket::VmIrq(socket) => match socket.recv() {
2442 Ok(request) => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002443 let response = {
2444 let irq_chip = &mut linux.irq_chip;
2445 request.execute(
2446 |setup| match setup {
2447 IrqSetup::Event(irq, ev) => {
2448 irq_chip.register_irq_event(irq, ev, None)
2449 }
2450 IrqSetup::Route(route) => irq_chip.route_irq(route),
2451 },
2452 &mut linux.resources,
2453 )
2454 };
Xiong Zhang2515b752019-09-19 10:29:02 +08002455 if let Err(e) = socket.send(&response) {
2456 error!("failed to send VmIrqResponse: {}", e);
2457 }
2458 }
2459 Err(e) => {
Zach Reizner297ae772020-02-21 14:45:14 -08002460 if let MsgError::RecvZero = e {
Xiong Zhang2515b752019-09-19 10:29:02 +08002461 vm_control_indices_to_remove.push(index);
2462 } else {
2463 error!("failed to recv VmIrqRequest: {}", e);
2464 }
2465 }
2466 },
Daniel Verkampe1980a92020-02-07 11:00:55 -08002467 TaggedControlSocket::VmMsync(socket) => match socket.recv() {
2468 Ok(request) => {
2469 let response = request.execute(&mut linux.vm);
2470 if let Err(e) = socket.send(&response) {
2471 error!("failed to send VmMsyncResponse: {}", e);
2472 }
2473 }
2474 Err(e) => {
2475 if let MsgError::BadRecvSize { actual: 0, .. } = e {
2476 vm_control_indices_to_remove.push(index);
2477 } else {
2478 error!("failed to recv VmMsyncRequest: {}", e);
2479 }
2480 }
2481 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08002482 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002483 }
2484 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002485 }
2486 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002487
Michael Hoylee392c462020-10-07 03:29:24 -07002488 for event in events.iter().filter(|e| e.is_hungup) {
2489 match event.token {
Zach Reiznera60744b2019-02-13 17:33:32 -08002490 Token::Exit => {}
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002491 Token::Suspend => {}
Zach Reiznera60744b2019-02-13 17:33:32 -08002492 Token::ChildSignal => {}
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002493 Token::IrqFd { gsi: _ } => {}
Charles William Dick0bf8a552019-10-29 15:36:01 +09002494 Token::BalanceMemory => {}
2495 Token::BalloonResult => {}
Zach Reiznera60744b2019-02-13 17:33:32 -08002496 Token::VmControlServer => {}
2497 Token::VmControl { index } => {
2498 // It's possible more data is readable and buffered while the socket is hungup,
2499 // so don't delete the socket from the poll context until we're sure all the
2500 // data is read.
Jakub Starond99cd0a2019-04-11 14:09:39 -07002501 match control_sockets
2502 .get(index)
2503 .map(|s| s.as_ref().get_readable_bytes())
2504 {
Zach Reiznera60744b2019-02-13 17:33:32 -08002505 Some(Ok(0)) | Some(Err(_)) => vm_control_indices_to_remove.push(index),
2506 Some(Ok(x)) => info!("control index {} has {} bytes readable", index, x),
2507 _ => {}
Zach Reizner55a9e502018-10-03 10:22:32 -07002508 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002509 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002510 }
2511 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002512
2513 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08002514 // preserves correct indexes as each element is removed.
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07002515 vm_control_indices_to_remove.sort_unstable_by_key(|&k| Reverse(k));
Zach Reiznera60744b2019-02-13 17:33:32 -08002516 vm_control_indices_to_remove.dedup();
2517 for index in vm_control_indices_to_remove {
Michael Hoylee392c462020-10-07 03:29:24 -07002518 // Delete the socket from the `wait_ctx` synchronously. Otherwise, the kernel will do
2519 // this automatically when the FD inserted into the `wait_ctx` is closed after this
Zide Chen89584072019-11-14 10:33:51 -08002520 // if-block, but this removal can be deferred unpredictably. In some instances where the
Michael Hoylee392c462020-10-07 03:29:24 -07002521 // system is under heavy load, we can even get events returned by `wait_ctx` for an FD
Zide Chen89584072019-11-14 10:33:51 -08002522 // that has already been closed. Because the token associated with that spurious event
2523 // now belongs to a different socket, the control loop will start to interact with
2524 // sockets that might not be ready to use. This can cause incorrect hangup detection or
2525 // blocking on a socket that will never be ready. See also: crbug.com/1019986
2526 if let Some(socket) = control_sockets.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07002527 wait_ctx.delete(socket).map_err(Error::WaitContextDelete)?;
Zide Chen89584072019-11-14 10:33:51 -08002528 }
2529
2530 // This line implicitly drops the socket at `index` when it gets returned by
2531 // `swap_remove`. After this line, the socket at `index` is not the one from
2532 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
Michael Hoylee392c462020-10-07 03:29:24 -07002533 // use `wait_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznera60744b2019-02-13 17:33:32 -08002534 control_sockets.swap_remove(index);
2535 if let Some(socket) = control_sockets.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07002536 wait_ctx
2537 .modify(socket, EventType::Read, Token::VmControl { index })
2538 .map_err(Error::WaitContextAdd)?;
Zach Reiznera60744b2019-02-13 17:33:32 -08002539 }
2540 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002541 }
2542
Zach Reizner6a8fdd92019-01-16 14:38:41 -08002543 // VCPU threads MUST see the VmRunMode flag, otherwise they may re-enter the VM.
2544 run_mode_arc.set_and_notify(VmRunMode::Exiting);
Dylan Reid059a1882018-07-23 17:58:09 -07002545 for handle in vcpu_handles {
Dmitry Torokhovcd405332018-02-16 16:25:54 -08002546 match handle.kill(SIGRTMIN() + 0) {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002547 Ok(_) => {
2548 if let Err(e) = handle.join() {
2549 error!("failed to join vcpu thread: {:?}", e);
2550 }
2551 }
David Tolnayb4bd00f2019-02-12 17:51:26 -08002552 Err(e) => error!("failed to kill vcpu thread: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -08002553 }
2554 }
2555
Daniel Verkamp94c35272019-09-12 13:31:30 -07002556 // Explicitly drop the VM structure here to allow the devices to clean up before the
2557 // control sockets are closed when this function exits.
2558 mem::drop(linux);
2559
Zach Reizner19ad1f32019-12-12 18:58:50 -08002560 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08002561 .set_canon_mode()
2562 .expect("failed to restore canonical mode for terminal");
2563
2564 Ok(())
2565}