blob: 84e2bf0af9d2efe55a1513cea816bb26e556202d [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07005use std::cmp::{max, Reverse};
Jakub Starona3411ea2019-04-24 10:55:25 -07006use std::convert::TryFrom;
John Batesb220eac2020-09-14 17:03:02 -07007#[cfg(feature = "gpu")]
8use std::env;
David Tolnayfdac5ed2019-03-08 16:56:14 -08009use std::error::Error as StdError;
Dylan Reid059a1882018-07-23 17:58:09 -070010use std::ffi::CStr;
David Tolnayc69f9752019-03-01 18:07:56 -080011use std::fmt::{self, Display};
Dylan Reid059a1882018-07-23 17:58:09 -070012use std::fs::{File, OpenOptions};
Zach Reizner55a9e502018-10-03 10:22:32 -070013use std::io::{self, stdin, Read};
Steven Richmanf32d0b42020-06-20 21:45:32 -070014use std::iter;
Daniel Verkamp94c35272019-09-12 13:31:30 -070015use std::mem;
David Tolnay2b089fc2019-03-04 15:33:22 -080016use std::net::Ipv4Addr;
Daniel Verkamp6f9215c2019-08-20 09:41:22 -070017#[cfg(feature = "gpu")]
Zach Reizner0f2cfb02019-06-19 17:46:03 -070018use std::num::NonZeroU8;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +090019use std::num::ParseIntError;
Jakub Starond99cd0a2019-04-11 14:09:39 -070020use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
Zach Reiznera60744b2019-02-13 17:33:32 -080021use std::os::unix::net::UnixStream;
Zach Reizner39aa26b2017-12-12 18:03:23 -080022use std::path::{Path, PathBuf};
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +090023use std::ptr;
Chirantan Ekbote448516e2018-07-24 16:07:42 -070024use std::str;
Dylan Reidb0492662019-05-17 14:50:13 -070025use std::sync::{mpsc, Arc, Barrier};
26
Zach Reizner39aa26b2017-12-12 18:03:23 -080027use std::thread;
28use std::thread::JoinHandle;
Charles William Dick0bf8a552019-10-29 15:36:01 +090029use std::time::Duration;
Zach Reizner39aa26b2017-12-12 18:03:23 -080030
David Tolnay41a6f842019-03-01 16:18:44 -080031use libc::{self, c_int, gid_t, uid_t};
Zach Reizner39aa26b2017-12-12 18:03:23 -080032
Tomasz Jeznach42644642020-05-20 23:27:59 -070033use acpi_tables::sdt::SDT;
34
Michael Hoyle6b196952020-08-02 20:09:41 -070035use base::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Zach Reizner65b98f12019-11-22 17:34:58 -080036#[cfg(feature = "gpu")]
37use devices::virtio::EventDevice;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070038use devices::virtio::{self, Console, VirtioDevice};
paulhsiace17e6e2020-08-28 18:37:45 +080039#[cfg(feature = "audio")]
40use devices::Ac97Dev;
Xiong Zhang17b0daf2019-04-23 17:14:50 +080041use devices::{
Colin Downs-Razoukbd532762020-09-08 15:49:35 -070042 self, HostBackendDeviceProvider, IrqEventIndex, KvmKernelIrqChip, PciDevice, VfioContainer,
43 VfioDevice, VfioPciDevice, VirtioPciDevice, XhciController,
Xiong Zhang17b0daf2019-04-23 17:14:50 +080044};
Steven Richmanf32d0b42020-06-20 21:45:32 -070045use hypervisor::kvm::{Kvm, KvmVcpu, KvmVm};
Zach Reizner304e7312020-09-29 16:00:24 -070046use hypervisor::{HypervisorCap, Vcpu, VcpuExit, VcpuRunHandle, Vm, VmCap};
Allen Webbf3024c82020-06-19 07:19:48 -070047use minijail::{self, Minijail};
Zach Reiznera60744b2019-02-13 17:33:32 -080048use msg_socket::{MsgError, MsgReceiver, MsgSender, MsgSocket};
David Tolnay2b089fc2019-03-04 15:33:22 -080049use net_util::{Error as NetError, MacAddress, Tap};
David Tolnay3df35522019-03-11 12:36:30 -070050use remain::sorted;
Xiong Zhang87a3b442019-10-29 17:32:44 +080051use resources::{Alloc, MmioType, SystemAllocator};
Dylan Reidb0492662019-05-17 14:50:13 -070052use sync::Mutex;
Jakub Starona3411ea2019-04-24 10:55:25 -070053
Michael Hoyle6b196952020-08-02 20:09:41 -070054use base::{
David Tolnay633426a2019-04-12 12:18:35 -070055 self, block_signal, clear_signal, drop_capabilities, error, flock, get_blocked_signals,
Fletcher Woodruff82ff3972019-10-02 13:11:34 -060056 get_group_id, get_user_id, getegid, geteuid, info, register_rt_signal_handler,
Michael Hoylee392c462020-10-07 03:29:24 -070057 set_cpu_affinity, set_rt_prio_limit, set_rt_round_robin, signal, validate_raw_fd, warn,
58 AsRawDescriptor, Event, EventType, ExternalMapping, FlockOperation, Killable,
59 MemoryMappingArena, PollToken, Protection, RawDescriptor, ScopedEvent, SignalFd, Terminal,
60 Timer, WaitContext, SIGRTMIN,
Zach Reiznera60744b2019-02-13 17:33:32 -080061};
Jakub Starone7c59052019-04-09 12:31:14 -070062use vm_control::{
Jakub Staron1f828d72019-04-11 12:49:29 -070063 BalloonControlCommand, BalloonControlRequestSocket, BalloonControlResponseSocket,
Charles William Dick664cc3c2020-01-10 14:31:52 +090064 BalloonControlResult, DiskControlCommand, DiskControlRequestSocket, DiskControlResponseSocket,
Steven Richmanf32d0b42020-06-20 21:45:32 -070065 DiskControlResult, IrqSetup, UsbControlSocket, VmControlResponseSocket, VmIrqRequest,
66 VmIrqRequestSocket, VmIrqResponse, VmIrqResponseSocket, VmMemoryControlRequestSocket,
67 VmMemoryControlResponseSocket, VmMemoryRequest, VmMemoryResponse, VmMsyncRequest,
68 VmMsyncRequestSocket, VmMsyncResponse, VmMsyncResponseSocket, VmRunMode,
Jakub Starone7c59052019-04-09 12:31:14 -070069};
Dylan Reidec058d62020-07-20 20:21:11 -070070use vm_memory::{GuestAddress, GuestMemory};
Zach Reizner39aa26b2017-12-12 18:03:23 -080071
Daniel Verkamp50740ce2020-02-28 12:36:56 -080072use crate::{Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption};
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070073use arch::{
Daniel Verkampc677fb42020-09-08 13:47:49 -070074 self, LinuxArch, RunnableLinuxVm, SerialHardware, SerialParameters, VcpuAffinity,
75 VirtioDeviceStub, VmComponents, VmImage,
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -070076};
Sonny Raoed517d12018-02-13 22:09:43 -080077
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080078#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070079use {
80 aarch64::AArch64 as Arch,
81 devices::{IrqChip, IrqChipAArch64 as IrqChipArch},
82 hypervisor::{VcpuAArch64 as VcpuArch, VmAArch64 as VmArch},
83};
Zach Reizner55a9e502018-10-03 10:22:32 -070084#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Steven Richmanf32d0b42020-06-20 21:45:32 -070085use {
86 devices::{IrqChipX86_64, IrqChipX86_64 as IrqChipArch, KvmSplitIrqChip},
87 hypervisor::{VcpuX86_64, VcpuX86_64 as VcpuArch, VmX86_64 as VmArch},
88 x86_64::X8664arch as Arch,
89};
Zach Reizner39aa26b2017-12-12 18:03:23 -080090
David Tolnay3df35522019-03-11 12:36:30 -070091#[sorted]
Dylan Reid059a1882018-07-23 17:58:09 -070092#[derive(Debug)]
Zach Reizner39aa26b2017-12-12 18:03:23 -080093pub enum Error {
Michael Hoyle6b196952020-08-02 20:09:41 -070094 AddGpuDeviceMemory(base::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -070095 AddIrqChipVcpu(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -070096 AddPmemDeviceMemory(base::Error),
Lepton Wu60893882018-11-21 11:06:18 -080097 AllocateGpuDeviceAddress,
Jakub Starona3411ea2019-04-24 10:55:25 -070098 AllocatePmemDeviceAddress(resources::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -080099 BalloonDeviceNew(virtio::BalloonError),
Michael Hoyle6b196952020-08-02 20:09:41 -0700100 BlockDeviceNew(base::Error),
101 BlockSignal(base::signal::Error),
David Tolnaybe034262019-03-04 17:48:36 -0800102 BuildVm(<Arch as LinuxArch>::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700103 ChownTpmStorage(base::Error),
Michael Hoyle685316f2020-09-16 15:29:20 -0700104 CloneEvent(base::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700105 CloneVcpu(base::Error),
106 ConfigureVcpu(<Arch as LinuxArch>::Error),
Andrew Scull1590e6f2020-03-18 18:00:47 +0000107 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +0800108 CreateAc97(devices::PciDeviceError),
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -0700109 CreateConsole(arch::serial::Error),
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700110 CreateDiskError(disk::Error),
Michael Hoyle685316f2020-09-16 15:29:20 -0700111 CreateEvent(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700112 CreateSignalFd(base::SignalFdError),
Zach Reizner8fb52112017-12-13 16:04:39 -0800113 CreateSocket(io::Error),
Chirantan Ekbote49fa08f2018-11-16 13:26:53 -0800114 CreateTapDevice(NetError),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700115 CreateTimer(base::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -0800116 CreateTpmStorage(PathBuf, io::Error),
Jingkui Wang100e6e42019-03-08 20:41:57 -0800117 CreateUsbProvider(devices::usb::host_backend::error::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700118 CreateVcpu(base::Error),
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800119 CreateVfioDevice(devices::vfio::VfioError),
Michael Hoylee392c462020-10-07 03:29:24 -0700120 CreateWaitContext(base::Error),
Allen Webbf3024c82020-06-19 07:19:48 -0700121 DeviceJail(minijail::Error),
122 DevicePivotRoot(minijail::Error),
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800123 Disk(PathBuf, io::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700124 DiskImageLock(base::Error),
125 DropCapabilities(base::Error),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900126 FsDeviceNew(virtio::fs::Error),
127 GetMaxOpenFiles(io::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700128 GetSignalMask(signal::Error),
Lepton Wu39133a02019-02-27 12:42:29 -0800129 InputDeviceNew(virtio::InputError),
130 InputEventsOpen(std::io::Error),
Dylan Reid20566442018-04-02 15:06:15 -0700131 InvalidFdPath,
Zach Reizner579bd2c2018-09-14 15:43:33 -0700132 InvalidWaylandPath,
Allen Webbf3024c82020-06-19 07:19:48 -0700133 IoJail(minijail::Error),
David Tolnayfdac5ed2019-03-08 16:56:14 -0800134 LoadKernel(Box<dyn StdError>),
Daniel Verkamp6a847062019-11-26 13:16:35 -0800135 MemoryTooLarge,
David Tolnay2b089fc2019-03-04 15:33:22 -0800136 NetDeviceNew(virtio::NetError),
Tomasz Jeznach42644642020-05-20 23:27:59 -0700137 OpenAcpiTable(PathBuf, io::Error),
Tristan Muntsinger4133b012018-12-21 16:01:56 -0800138 OpenAndroidFstab(PathBuf, io::Error),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700139 OpenBios(PathBuf, io::Error),
Daniel Verkampe403f5c2018-12-11 16:29:26 -0800140 OpenInitrd(PathBuf, io::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800141 OpenKernel(PathBuf, io::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -0800142 OpenVinput(PathBuf, io::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800143 P9DeviceNew(virtio::P9Error),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900144 ParseMaxOpenFiles(ParseIntError),
Lepton Wu39133a02019-02-27 12:42:29 -0800145 PivotRootDoesntExist(&'static str),
Jakub Starona3411ea2019-04-24 10:55:25 -0700146 PmemDeviceImageTooBig,
Michael Hoyle6b196952020-08-02 20:09:41 -0700147 PmemDeviceNew(base::Error),
Charles William Dick0bf8a552019-10-29 15:36:01 +0900148 ReadMemAvailable(io::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700149 RegisterBalloon(arch::DeviceRegistrationError),
150 RegisterBlock(arch::DeviceRegistrationError),
151 RegisterGpu(arch::DeviceRegistrationError),
152 RegisterNet(arch::DeviceRegistrationError),
153 RegisterP9(arch::DeviceRegistrationError),
154 RegisterRng(arch::DeviceRegistrationError),
Michael Hoyle6b196952020-08-02 20:09:41 -0700155 RegisterSignalHandler(base::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700156 RegisterWayland(arch::DeviceRegistrationError),
Michael Hoyle6b196952020-08-02 20:09:41 -0700157 ReserveGpuMemory(base::MmapError),
158 ReserveMemory(base::Error),
159 ReservePmemMemory(base::MmapError),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700160 ResetTimer(base::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800161 RngDeviceNew(virtio::RngError),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700162 RunnableVcpu(base::Error),
Allen Webbf3024c82020-06-19 07:19:48 -0700163 SettingGidMap(minijail::Error),
164 SettingMaxOpenFiles(minijail::Error),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700165 SettingSignalMask(base::Error),
Allen Webbf3024c82020-06-19 07:19:48 -0700166 SettingUidMap(minijail::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700167 SignalFd(base::SignalFdError),
Zach Reizner8fb52112017-12-13 16:04:39 -0800168 SpawnVcpu(io::Error),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700169 Timer(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700170 ValidateRawFd(base::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800171 VhostNetDeviceNew(virtio::vhost::Error),
172 VhostVsockDeviceNew(virtio::vhost::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700173 VirtioPciDev(base::Error),
Michael Hoylee392c462020-10-07 03:29:24 -0700174 WaitContextAdd(base::Error),
175 WaitContextDelete(base::Error),
Michael Hoyle6b196952020-08-02 20:09:41 -0700176 WaylandDeviceNew(base::Error),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800177}
178
David Tolnayc69f9752019-03-01 18:07:56 -0800179impl Display for Error {
David Tolnay3df35522019-03-11 12:36:30 -0700180 #[remain::check]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800181 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
David Tolnayc69f9752019-03-01 18:07:56 -0800182 use self::Error::*;
183
David Tolnay3df35522019-03-11 12:36:30 -0700184 #[sorted]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800185 match self {
Lepton Wu60893882018-11-21 11:06:18 -0800186 AddGpuDeviceMemory(e) => write!(f, "failed to add gpu device memory: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700187 AddIrqChipVcpu(e) => write!(f, "failed to add vcpu to irq chip: {}", e),
Jakub Starona3411ea2019-04-24 10:55:25 -0700188 AddPmemDeviceMemory(e) => write!(f, "failed to add pmem device memory: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800189 AllocateGpuDeviceAddress => write!(f, "failed to allocate gpu device guest address"),
Jakub Starona3411ea2019-04-24 10:55:25 -0700190 AllocatePmemDeviceAddress(e) => {
191 write!(f, "failed to allocate memory for pmem device: {}", e)
192 }
David Tolnayc69f9752019-03-01 18:07:56 -0800193 BalloonDeviceNew(e) => write!(f, "failed to create balloon: {}", e),
194 BlockDeviceNew(e) => write!(f, "failed to create block device: {}", e),
195 BlockSignal(e) => write!(f, "failed to block signal: {}", e),
David Tolnaybe034262019-03-04 17:48:36 -0800196 BuildVm(e) => write!(f, "The architecture failed to build the vm: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800197 ChownTpmStorage(e) => write!(f, "failed to chown tpm storage: {}", e),
Michael Hoyle685316f2020-09-16 15:29:20 -0700198 CloneEvent(e) => write!(f, "failed to clone event: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700199 CloneVcpu(e) => write!(f, "failed to clone vcpu: {}", e),
200 ConfigureVcpu(e) => write!(f, "failed to configure vcpu: {}", e),
Andrew Scull1590e6f2020-03-18 18:00:47 +0000201 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +0800202 CreateAc97(e) => write!(f, "failed to create ac97 device: {}", e),
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -0700203 CreateConsole(e) => write!(f, "failed to create console device: {}", e),
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700204 CreateDiskError(e) => write!(f, "failed to create virtual disk: {}", e),
Michael Hoyle685316f2020-09-16 15:29:20 -0700205 CreateEvent(e) => write!(f, "failed to create event: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800206 CreateSignalFd(e) => write!(f, "failed to create signalfd: {}", e),
207 CreateSocket(e) => write!(f, "failed to create socket: {}", e),
208 CreateTapDevice(e) => write!(f, "failed to create tap device: {}", e),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700209 CreateTimer(e) => write!(f, "failed to create Timer: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800210 CreateTpmStorage(p, e) => {
211 write!(f, "failed to create tpm storage dir {}: {}", p.display(), e)
212 }
Jingkui Wang100e6e42019-03-08 20:41:57 -0800213 CreateUsbProvider(e) => write!(f, "failed to create usb provider: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700214 CreateVcpu(e) => write!(f, "failed to create vcpu: {}", e),
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800215 CreateVfioDevice(e) => write!(f, "Failed to create vfio device {}", e),
Michael Hoylee392c462020-10-07 03:29:24 -0700216 CreateWaitContext(e) => write!(f, "failed to create wait context: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800217 DeviceJail(e) => write!(f, "failed to jail device: {}", e),
218 DevicePivotRoot(e) => write!(f, "failed to pivot root device: {}", e),
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800219 Disk(p, e) => write!(f, "failed to load disk image {}: {}", p.display(), e),
David Tolnayc69f9752019-03-01 18:07:56 -0800220 DiskImageLock(e) => write!(f, "failed to lock disk image: {}", e),
Dmitry Torokhov71006072019-03-06 10:56:51 -0800221 DropCapabilities(e) => write!(f, "failed to drop process capabilities: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900222 FsDeviceNew(e) => write!(f, "failed to create fs device: {}", e),
223 GetMaxOpenFiles(e) => write!(f, "failed to get max number of open files: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700224 GetSignalMask(e) => write!(f, "failed to retrieve signal mask for vcpu: {}", e),
David Tolnay64cd5ea2019-04-15 15:56:35 -0700225 InputDeviceNew(e) => write!(f, "failed to set up input device: {}", e),
226 InputEventsOpen(e) => write!(f, "failed to open event device: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800227 InvalidFdPath => write!(f, "failed parsing a /proc/self/fd/*"),
228 InvalidWaylandPath => write!(f, "wayland socket path has no parent or file name"),
David Tolnayfd0971d2019-03-04 17:15:57 -0800229 IoJail(e) => write!(f, "{}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800230 LoadKernel(e) => write!(f, "failed to load kernel: {}", e),
Daniel Verkamp6a847062019-11-26 13:16:35 -0800231 MemoryTooLarge => write!(f, "requested memory size too large"),
David Tolnayc69f9752019-03-01 18:07:56 -0800232 NetDeviceNew(e) => write!(f, "failed to set up virtio networking: {}", e),
Tomasz Jeznach42644642020-05-20 23:27:59 -0700233 OpenAcpiTable(p, e) => write!(f, "failed to open ACPI file {}: {}", p.display(), e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800234 OpenAndroidFstab(p, e) => write!(
David Tolnayb4bd00f2019-02-12 17:51:26 -0800235 f,
236 "failed to open android fstab file {}: {}",
237 p.display(),
238 e
239 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700240 OpenBios(p, e) => write!(f, "failed to open bios {}: {}", p.display(), e),
David Tolnay3df35522019-03-11 12:36:30 -0700241 OpenInitrd(p, e) => write!(f, "failed to open initrd {}: {}", p.display(), e),
242 OpenKernel(p, e) => write!(f, "failed to open kernel image {}: {}", p.display(), e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800243 OpenVinput(p, e) => write!(f, "failed to open vinput device {}: {}", p.display(), e),
David Tolnayc69f9752019-03-01 18:07:56 -0800244 P9DeviceNew(e) => write!(f, "failed to create 9p device: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900245 ParseMaxOpenFiles(e) => write!(f, "failed to parse max number of open files: {}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800246 PivotRootDoesntExist(p) => write!(f, "{} doesn't exist, can't jail devices.", p),
Jakub Starona3411ea2019-04-24 10:55:25 -0700247 PmemDeviceImageTooBig => {
248 write!(f, "failed to create pmem device: pmem device image too big")
249 }
250 PmemDeviceNew(e) => write!(f, "failed to create pmem device: {}", e),
Charles William Dick0bf8a552019-10-29 15:36:01 +0900251 ReadMemAvailable(e) => write!(f, "failed to read /proc/meminfo: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800252 RegisterBalloon(e) => write!(f, "error registering balloon device: {}", e),
253 RegisterBlock(e) => write!(f, "error registering block device: {}", e),
254 RegisterGpu(e) => write!(f, "error registering gpu device: {}", e),
255 RegisterNet(e) => write!(f, "error registering net device: {}", e),
256 RegisterP9(e) => write!(f, "error registering 9p device: {}", e),
257 RegisterRng(e) => write!(f, "error registering rng device: {}", e),
258 RegisterSignalHandler(e) => write!(f, "error registering signal handler: {}", e),
259 RegisterWayland(e) => write!(f, "error registering wayland device: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800260 ReserveGpuMemory(e) => write!(f, "failed to reserve gpu memory: {}", e),
261 ReserveMemory(e) => write!(f, "failed to reserve memory: {}", e),
Jakub Starona3411ea2019-04-24 10:55:25 -0700262 ReservePmemMemory(e) => write!(f, "failed to reserve pmem memory: {}", e),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700263 ResetTimer(e) => write!(f, "failed to reset Timer: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800264 RngDeviceNew(e) => write!(f, "failed to set up rng: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700265 RunnableVcpu(e) => write!(f, "failed to set thread id for vcpu: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800266 SettingGidMap(e) => write!(f, "error setting GID map: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900267 SettingMaxOpenFiles(e) => write!(f, "error setting max open files: {}", e),
Steven Richmanf32d0b42020-06-20 21:45:32 -0700268 SettingSignalMask(e) => write!(f, "failed to set the signal mask for vcpu: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800269 SettingUidMap(e) => write!(f, "error setting UID map: {}", e),
270 SignalFd(e) => write!(f, "failed to read signal fd: {}", e),
271 SpawnVcpu(e) => write!(f, "failed to spawn VCPU thread: {}", e),
Michael Hoyle08d86a42020-08-19 14:45:21 -0700272 Timer(e) => write!(f, "failed to read timer fd: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800273 ValidateRawFd(e) => write!(f, "failed to validate raw fd: {}", e),
274 VhostNetDeviceNew(e) => write!(f, "failed to set up vhost networking: {}", e),
275 VhostVsockDeviceNew(e) => write!(f, "failed to set up virtual socket device: {}", e),
276 VirtioPciDev(e) => write!(f, "failed to create virtio pci dev: {}", e),
Michael Hoylee392c462020-10-07 03:29:24 -0700277 WaitContextAdd(e) => write!(f, "failed to add descriptor to wait context: {}", e),
278 WaitContextDelete(e) => {
279 write!(f, "failed to remove descriptor from wait context: {}", e)
280 }
David Tolnayc69f9752019-03-01 18:07:56 -0800281 WaylandDeviceNew(e) => write!(f, "failed to create wayland device: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800282 }
283 }
284}
285
Allen Webbf3024c82020-06-19 07:19:48 -0700286impl From<minijail::Error> for Error {
287 fn from(err: minijail::Error) -> Self {
David Tolnayfd0971d2019-03-04 17:15:57 -0800288 Error::IoJail(err)
289 }
290}
291
David Tolnayc69f9752019-03-01 18:07:56 -0800292impl std::error::Error for Error {}
Dylan Reid059a1882018-07-23 17:58:09 -0700293
Zach Reizner39aa26b2017-12-12 18:03:23 -0800294type Result<T> = std::result::Result<T, Error>;
295
Jakub Starond99cd0a2019-04-11 14:09:39 -0700296enum TaggedControlSocket {
297 Vm(VmControlResponseSocket),
Gurchetan Singh53edb812019-05-22 08:57:16 -0700298 VmMemory(VmMemoryControlResponseSocket),
Xiong Zhang2515b752019-09-19 10:29:02 +0800299 VmIrq(VmIrqResponseSocket),
Daniel Verkampe1980a92020-02-07 11:00:55 -0800300 VmMsync(VmMsyncResponseSocket),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700301}
302
303impl AsRef<UnixSeqpacket> for TaggedControlSocket {
304 fn as_ref(&self) -> &UnixSeqpacket {
305 use self::TaggedControlSocket::*;
306 match &self {
Chirantan Ekbote50582532020-01-16 16:49:14 +0900307 Vm(ref socket) => socket.as_ref(),
308 VmMemory(ref socket) => socket.as_ref(),
309 VmIrq(ref socket) => socket.as_ref(),
Daniel Verkampe1980a92020-02-07 11:00:55 -0800310 VmMsync(ref socket) => socket.as_ref(),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700311 }
312 }
313}
314
Michael Hoylee392c462020-10-07 03:29:24 -0700315impl AsRawDescriptor for TaggedControlSocket {
316 fn as_raw_descriptor(&self) -> RawDescriptor {
Jakub Starond99cd0a2019-04-11 14:09:39 -0700317 self.as_ref().as_raw_fd()
318 }
319}
320
Andrew Walbranf50bab62020-07-07 13:22:53 +0100321fn get_max_open_files() -> Result<u64> {
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +0900322 let mut buf = mem::MaybeUninit::<libc::rlimit64>::zeroed();
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900323
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +0900324 // Safe because this will only modify `buf` and we check the return value.
325 let res = unsafe { libc::prlimit64(0, libc::RLIMIT_NOFILE, ptr::null(), buf.as_mut_ptr()) };
326 if res == 0 {
327 // Safe because the kernel guarantees that the struct is fully initialized.
328 let limit = unsafe { buf.assume_init() };
329 Ok(limit.rlim_max)
330 } else {
331 Err(Error::GetMaxOpenFiles(io::Error::last_os_error()))
332 }
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900333}
334
Matt Delcoc24ad782020-02-14 13:24:36 -0800335struct SandboxConfig<'a> {
336 limit_caps: bool,
337 log_failures: bool,
338 seccomp_policy: &'a Path,
339 uid_map: Option<&'a str>,
340 gid_map: Option<&'a str>,
341}
342
Zach Reizner44863792019-06-26 14:22:08 -0700343fn create_base_minijail(
344 root: &Path,
Matt Delcoc24ad782020-02-14 13:24:36 -0800345 r_limit: Option<u64>,
346 config: Option<&SandboxConfig>,
Zach Reizner44863792019-06-26 14:22:08 -0700347) -> Result<Minijail> {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800348 // All child jails run in a new user namespace without any users mapped,
349 // they run as nobody unless otherwise configured.
David Tolnay5bbbf612018-12-01 17:49:30 -0800350 let mut j = Minijail::new().map_err(Error::DeviceJail)?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800351
352 if let Some(config) = config {
353 j.namespace_pids();
354 j.namespace_user();
355 j.namespace_user_disable_setgroups();
356 if config.limit_caps {
357 // Don't need any capabilities.
358 j.use_caps(0);
359 }
360 if let Some(uid_map) = config.uid_map {
361 j.uidmap(uid_map).map_err(Error::SettingUidMap)?;
362 }
363 if let Some(gid_map) = config.gid_map {
364 j.gidmap(gid_map).map_err(Error::SettingGidMap)?;
365 }
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900366 // Run in a new mount namespace.
367 j.namespace_vfs();
368
Matt Delcoc24ad782020-02-14 13:24:36 -0800369 // Run in an empty network namespace.
370 j.namespace_net();
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900371
372 // Don't allow the device to gain new privileges.
Matt Delcoc24ad782020-02-14 13:24:36 -0800373 j.no_new_privs();
374
375 // By default we'll prioritize using the pre-compiled .bpf over the .policy
376 // file (the .bpf is expected to be compiled using "trap" as the failure
377 // behavior instead of the default "kill" behavior).
378 // Refer to the code comment for the "seccomp-log-failures"
379 // command-line parameter for an explanation about why the |log_failures|
380 // flag forces the use of .policy files (and the build-time alternative to
381 // this run-time flag).
382 let bpf_policy_file = config.seccomp_policy.with_extension("bpf");
383 if bpf_policy_file.exists() && !config.log_failures {
384 j.parse_seccomp_program(&bpf_policy_file)
385 .map_err(Error::DeviceJail)?;
386 } else {
387 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP,
388 // which will correctly kill the entire device process if a worker
389 // thread commits a seccomp violation.
390 j.set_seccomp_filter_tsync();
391 if config.log_failures {
392 j.log_seccomp_filter_failures();
393 }
394 j.parse_seccomp_filters(&config.seccomp_policy.with_extension("policy"))
395 .map_err(Error::DeviceJail)?;
396 }
397 j.use_seccomp_filter();
398 // Don't do init setup.
399 j.run_as_init();
400 }
401
Chirantan Ekbotef84c2292020-02-21 16:37:27 +0900402 // Only pivot_root if we are not re-using the current root directory.
403 if root != Path::new("/") {
404 // It's safe to call `namespace_vfs` multiple times.
405 j.namespace_vfs();
406 j.enter_pivot_root(root).map_err(Error::DevicePivotRoot)?;
407 }
Matt Delco45caf912019-11-13 08:11:09 -0800408
Matt Delcoc24ad782020-02-14 13:24:36 -0800409 // Most devices don't need to open many fds.
410 let limit = if let Some(r) = r_limit { r } else { 1024u64 };
411 j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)
412 .map_err(Error::SettingMaxOpenFiles)?;
413
Zach Reizner39aa26b2017-12-12 18:03:23 -0800414 Ok(j)
415}
416
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800417fn simple_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
Lepton Wu9105e9f2019-03-14 11:38:31 -0700418 if cfg.sandbox {
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800419 let pivot_root: &str = option_env!("DEFAULT_PIVOT_ROOT").unwrap_or("/var/empty");
420 // A directory for a jailed device's pivot root.
421 let root_path = Path::new(pivot_root);
422 if !root_path.exists() {
423 return Err(Error::PivotRootDoesntExist(pivot_root));
424 }
425 let policy_path: PathBuf = cfg.seccomp_policy_dir.join(policy);
Matt Delcoc24ad782020-02-14 13:24:36 -0800426 let config = SandboxConfig {
427 limit_caps: true,
428 log_failures: cfg.seccomp_log_failures,
429 seccomp_policy: &policy_path,
430 uid_map: None,
431 gid_map: None,
432 };
433 Ok(Some(create_base_minijail(root_path, None, Some(&config))?))
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800434 } else {
435 Ok(None)
436 }
437}
438
David Tolnayfd0971d2019-03-04 17:15:57 -0800439type DeviceResult<T = VirtioDeviceStub> = std::result::Result<T, Error>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800440
441fn create_block_device(
442 cfg: &Config,
443 disk: &DiskOption,
Jakub Staronecf81e02019-04-11 11:43:39 -0700444 disk_device_socket: DiskControlResponseSocket,
David Tolnay2b089fc2019-03-04 15:33:22 -0800445) -> DeviceResult {
446 // Special case '/proc/self/fd/*' paths. The FD is already open, just use it.
447 let raw_image: File = if disk.path.parent() == Some(Path::new("/proc/self/fd")) {
448 // Safe because we will validate |raw_fd|.
449 unsafe { File::from_raw_fd(raw_fd_from_path(&disk.path)?) }
450 } else {
451 OpenOptions::new()
452 .read(true)
453 .write(!disk.read_only)
454 .open(&disk.path)
Daniel Verkamp46d61ba2020-02-25 10:17:50 -0800455 .map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?
David Tolnay2b089fc2019-03-04 15:33:22 -0800456 };
457 // Lock the disk image to prevent other crosvm instances from using it.
458 let lock_op = if disk.read_only {
459 FlockOperation::LockShared
460 } else {
461 FlockOperation::LockExclusive
462 };
463 flock(&raw_image, lock_op, true).map_err(Error::DiskImageLock)?;
464
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700465 let disk_file = disk::create_disk_file(raw_image).map_err(Error::CreateDiskError)?;
Daniel Verkampe73c80f2019-11-08 10:11:16 -0800466 let dev = virtio::Block::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100467 virtio::base_features(cfg.protected_vm),
Daniel Verkampe73c80f2019-11-08 10:11:16 -0800468 disk_file,
469 disk.read_only,
470 disk.sparse,
Daniel Verkamp27672232019-12-06 17:26:55 +1100471 disk.block_size,
Daniel Verkampe73c80f2019-11-08 10:11:16 -0800472 Some(disk_device_socket),
473 )
474 .map_err(Error::BlockDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800475
476 Ok(VirtioDeviceStub {
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700477 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800478 jail: simple_jail(&cfg, "block_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800479 })
480}
481
482fn create_rng_device(cfg: &Config) -> DeviceResult {
483 let dev = virtio::Rng::new().map_err(Error::RngDeviceNew)?;
484
485 Ok(VirtioDeviceStub {
486 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800487 jail: simple_jail(&cfg, "rng_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800488 })
489}
490
491#[cfg(feature = "tpm")]
492fn create_tpm_device(cfg: &Config) -> DeviceResult {
Michael Hoyle6b196952020-08-02 20:09:41 -0700493 use base::chown;
David Tolnay2b089fc2019-03-04 15:33:22 -0800494 use std::ffi::CString;
495 use std::fs;
496 use std::process;
David Tolnay2b089fc2019-03-04 15:33:22 -0800497
498 let tpm_storage: PathBuf;
Matt Delco45caf912019-11-13 08:11:09 -0800499 let mut tpm_jail = simple_jail(&cfg, "tpm_device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800500
501 match &mut tpm_jail {
502 Some(jail) => {
503 // Create a tmpfs in the device's root directory for tpm
504 // simulator storage. The size is 20*1024, or 20 KB.
505 jail.mount_with_data(
506 Path::new("none"),
507 Path::new("/"),
508 "tmpfs",
509 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
510 "size=20480",
511 )?;
512
513 let crosvm_ids = add_crosvm_user_to_jail(jail, "tpm")?;
514
515 let pid = process::id();
516 let tpm_pid_dir = format!("/run/vm/tpm.{}", pid);
517 tpm_storage = Path::new(&tpm_pid_dir).to_owned();
David Tolnayfd0971d2019-03-04 17:15:57 -0800518 fs::create_dir_all(&tpm_storage)
519 .map_err(|e| Error::CreateTpmStorage(tpm_storage.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800520 let tpm_pid_dir_c = CString::new(tpm_pid_dir).expect("no nul bytes");
David Tolnayfd0971d2019-03-04 17:15:57 -0800521 chown(&tpm_pid_dir_c, crosvm_ids.uid, crosvm_ids.gid)
522 .map_err(Error::ChownTpmStorage)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800523
524 jail.mount_bind(&tpm_storage, &tpm_storage, true)?;
525 }
526 None => {
527 // Path used inside cros_sdk which does not have /run/vm.
528 tpm_storage = Path::new("/tmp/tpm-simulator").to_owned();
529 }
530 }
531
532 let dev = virtio::Tpm::new(tpm_storage);
533
534 Ok(VirtioDeviceStub {
535 dev: Box::new(dev),
536 jail: tpm_jail,
537 })
538}
539
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800540fn create_single_touch_device(cfg: &Config, single_touch_spec: &TouchDeviceOption) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800541 let socket = single_touch_spec
542 .get_path()
543 .into_unix_stream()
544 .map_err(|e| {
545 error!("failed configuring virtio single touch: {:?}", e);
546 e
547 })?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800548
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800549 let (width, height) = single_touch_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700550 let dev = virtio::new_single_touch(
551 socket,
552 width,
553 height,
554 virtio::base_features(cfg.protected_vm),
555 )
556 .map_err(Error::InputDeviceNew)?;
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800557 Ok(VirtioDeviceStub {
558 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800559 jail: simple_jail(&cfg, "input_device")?,
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800560 })
561}
562
563fn create_trackpad_device(cfg: &Config, trackpad_spec: &TouchDeviceOption) -> DeviceResult {
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800564 let socket = trackpad_spec.get_path().into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800565 error!("failed configuring virtio trackpad: {}", e);
566 e
567 })?;
568
Kaiyi Libccb4eb2020-02-06 17:53:11 -0800569 let (width, height) = trackpad_spec.get_size();
Noah Goldd4ca29b2020-10-27 12:21:52 -0700570 let dev = virtio::new_trackpad(
571 socket,
572 width,
573 height,
574 virtio::base_features(cfg.protected_vm),
575 )
576 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800577
578 Ok(VirtioDeviceStub {
579 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800580 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800581 })
582}
583
Zach Reizner65b98f12019-11-22 17:34:58 -0800584fn create_mouse_device<T: IntoUnixStream>(cfg: &Config, mouse_socket: T) -> DeviceResult {
585 let socket = mouse_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800586 error!("failed configuring virtio mouse: {}", e);
587 e
588 })?;
589
Noah Goldd4ca29b2020-10-27 12:21:52 -0700590 let dev = virtio::new_mouse(socket, virtio::base_features(cfg.protected_vm))
591 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800592
593 Ok(VirtioDeviceStub {
594 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800595 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800596 })
597}
598
Zach Reizner65b98f12019-11-22 17:34:58 -0800599fn create_keyboard_device<T: IntoUnixStream>(cfg: &Config, keyboard_socket: T) -> DeviceResult {
600 let socket = keyboard_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800601 error!("failed configuring virtio keyboard: {}", e);
602 e
603 })?;
604
Noah Goldd4ca29b2020-10-27 12:21:52 -0700605 let dev = virtio::new_keyboard(socket, virtio::base_features(cfg.protected_vm))
606 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800607
608 Ok(VirtioDeviceStub {
609 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800610 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800611 })
612}
613
614fn create_vinput_device(cfg: &Config, dev_path: &Path) -> DeviceResult {
615 let dev_file = OpenOptions::new()
616 .read(true)
617 .write(true)
618 .open(dev_path)
David Tolnayfd0971d2019-03-04 17:15:57 -0800619 .map_err(|e| Error::OpenVinput(dev_path.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800620
Noah Goldd4ca29b2020-10-27 12:21:52 -0700621 let dev = virtio::new_evdev(dev_file, virtio::base_features(cfg.protected_vm))
622 .map_err(Error::InputDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800623
624 Ok(VirtioDeviceStub {
625 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800626 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800627 })
628}
629
Jakub Staron1f828d72019-04-11 12:49:29 -0700630fn create_balloon_device(cfg: &Config, socket: BalloonControlResponseSocket) -> DeviceResult {
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100631 let dev = virtio::Balloon::new(virtio::base_features(cfg.protected_vm), socket)
632 .map_err(Error::BalloonDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800633
634 Ok(VirtioDeviceStub {
635 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800636 jail: simple_jail(&cfg, "balloon_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800637 })
638}
639
640fn create_tap_net_device(cfg: &Config, tap_fd: RawFd) -> DeviceResult {
641 // Safe because we ensure that we get a unique handle to the fd.
642 let tap = unsafe {
643 Tap::from_raw_fd(validate_raw_fd(tap_fd).map_err(Error::ValidateRawFd)?)
644 .map_err(Error::CreateTapDevice)?
645 };
646
Xiong Zhang773c7072020-03-20 10:39:55 +0800647 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
648 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700649 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800650 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
651 vq_pairs = 1;
652 }
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100653 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +0100654 let dev = virtio::Net::from(features, tap, vq_pairs).map_err(Error::NetDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800655
656 Ok(VirtioDeviceStub {
657 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800658 jail: simple_jail(&cfg, "net_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800659 })
660}
661
662fn create_net_device(
663 cfg: &Config,
664 host_ip: Ipv4Addr,
665 netmask: Ipv4Addr,
666 mac_address: MacAddress,
667 mem: &GuestMemory,
668) -> DeviceResult {
Xiong Zhang773c7072020-03-20 10:39:55 +0800669 let mut vq_pairs = cfg.net_vq_pairs.unwrap_or(1);
670 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
Steven Richmanf32d0b42020-06-20 21:45:32 -0700671 if vcpu_count < vq_pairs as usize {
Xiong Zhang773c7072020-03-20 10:39:55 +0800672 error!("net vq pairs must be smaller than vcpu count, fall back to single queue mode");
673 vq_pairs = 1;
674 }
675
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100676 let features = virtio::base_features(cfg.protected_vm);
David Tolnay2b089fc2019-03-04 15:33:22 -0800677 let dev = if cfg.vhost_net {
Will Deacon81d5adb2020-10-06 18:37:48 +0100678 let dev = virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(
679 features,
680 host_ip,
681 netmask,
682 mac_address,
683 mem,
684 )
685 .map_err(Error::VhostNetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800686 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800687 } else {
Will Deacon81d5adb2020-10-06 18:37:48 +0100688 let dev = virtio::Net::<Tap>::new(features, host_ip, netmask, mac_address, vq_pairs)
Xiong Zhang773c7072020-03-20 10:39:55 +0800689 .map_err(Error::NetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800690 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800691 };
692
693 let policy = if cfg.vhost_net {
Matt Delco45caf912019-11-13 08:11:09 -0800694 "vhost_net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800695 } else {
Matt Delco45caf912019-11-13 08:11:09 -0800696 "net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800697 };
698
699 Ok(VirtioDeviceStub {
700 dev,
701 jail: simple_jail(&cfg, policy)?,
702 })
703}
704
705#[cfg(feature = "gpu")]
706fn create_gpu_device(
707 cfg: &Config,
Michael Hoyle685316f2020-09-16 15:29:20 -0700708 exit_evt: &Event,
Gurchetan Singh7ec58fa2019-05-15 15:30:38 -0700709 gpu_device_socket: VmMemoryControlRequestSocket,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900710 gpu_sockets: Vec<virtio::resource_bridge::ResourceResponseSocket>,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900711 wayland_socket_path: Option<&PathBuf>,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700712 x_display: Option<String>,
Zach Reizner65b98f12019-11-22 17:34:58 -0800713 event_devices: Vec<EventDevice>,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700714 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800715) -> DeviceResult {
716 let jailed_wayland_path = Path::new("/wayland-0");
717
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700718 let mut display_backends = vec![
719 virtio::DisplayBackend::X(x_display),
Jason Macnak60eb1fb2020-01-09 14:36:29 -0800720 virtio::DisplayBackend::Stub,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700721 ];
722
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900723 if let Some(socket_path) = wayland_socket_path {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700724 display_backends.insert(
725 0,
726 virtio::DisplayBackend::Wayland(if cfg.sandbox {
727 Some(jailed_wayland_path.to_owned())
728 } else {
729 Some(socket_path.to_owned())
730 }),
731 );
732 }
733
David Tolnay2b089fc2019-03-04 15:33:22 -0800734 let dev = virtio::Gpu::new(
Michael Hoyle685316f2020-09-16 15:29:20 -0700735 exit_evt.try_clone().map_err(Error::CloneEvent)?,
Gurchetan Singh7ec58fa2019-05-15 15:30:38 -0700736 Some(gpu_device_socket),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700737 NonZeroU8::new(1).unwrap(), // number of scanouts
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900738 gpu_sockets,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700739 display_backends,
Jason Macnakcc7070b2019-11-06 14:48:12 -0800740 cfg.gpu_parameters.as_ref().unwrap(),
Zach Reizner65b98f12019-11-22 17:34:58 -0800741 event_devices,
Lingfeng Yang5572c8d2020-05-05 08:40:36 -0700742 map_request,
743 cfg.sandbox,
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100744 virtio::base_features(cfg.protected_vm),
David Tolnay2b089fc2019-03-04 15:33:22 -0800745 );
746
Matt Delco45caf912019-11-13 08:11:09 -0800747 let jail = match simple_jail(&cfg, "gpu_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800748 Some(mut jail) => {
749 // Create a tmpfs in the device's root directory so that we can bind mount the
750 // dri directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
751 jail.mount_with_data(
752 Path::new("none"),
753 Path::new("/"),
754 "tmpfs",
755 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
756 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800757 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800758
759 // Device nodes required for DRM.
760 let sys_dev_char_path = Path::new("/sys/dev/char");
David Tolnayfd0971d2019-03-04 17:15:57 -0800761 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800762 let sys_devices_path = Path::new("/sys/devices");
David Tolnayfd0971d2019-03-04 17:15:57 -0800763 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
Jason Macnak23400522020-08-28 09:10:46 -0700764
David Tolnay2b089fc2019-03-04 15:33:22 -0800765 let drm_dri_path = Path::new("/dev/dri");
Jason Macnak23400522020-08-28 09:10:46 -0700766 if drm_dri_path.exists() {
767 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
768 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800769
John Batesb220eac2020-09-14 17:03:02 -0700770 // Prepare GPU shader disk cache directory.
771 if let Some(cache_dir) = cfg
772 .gpu_parameters
773 .as_ref()
774 .and_then(|params| params.cache_path.as_ref())
775 {
776 if cfg!(any(target_arch = "arm", target_arch = "aarch64")) && cfg.sandbox {
777 warn!("shader caching not yet supported on ARM with sandbox enabled");
778 env::set_var("MESA_GLSL_CACHE_DISABLE", "true");
779 } else {
John Bates04059732020-10-01 15:58:55 -0700780 env::set_var("MESA_GLSL_CACHE_DISABLE", "false");
John Batesb220eac2020-09-14 17:03:02 -0700781 env::set_var("MESA_GLSL_CACHE_DIR", cache_dir);
782 if let Some(cache_size) = cfg
783 .gpu_parameters
784 .as_ref()
785 .and_then(|params| params.cache_size.as_ref())
786 {
787 env::set_var("MESA_GLSL_CACHE_MAX_SIZE", cache_size);
788 }
789 let shadercache_path = Path::new(cache_dir);
790 jail.mount_bind(shadercache_path, shadercache_path, true)?;
791 }
792 }
793
David Riley06787c52019-07-24 12:09:07 -0700794 // If the ARM specific devices exist on the host, bind mount them in.
795 let mali0_path = Path::new("/dev/mali0");
796 if mali0_path.exists() {
797 jail.mount_bind(mali0_path, mali0_path, true)?;
798 }
799
800 let pvr_sync_path = Path::new("/dev/pvr_sync");
801 if pvr_sync_path.exists() {
802 jail.mount_bind(pvr_sync_path, pvr_sync_path, true)?;
803 }
804
David Tolnay2b089fc2019-03-04 15:33:22 -0800805 // Libraries that are required when mesa drivers are dynamically loaded.
David Riley06787c52019-07-24 12:09:07 -0700806 let lib_dirs = &["/usr/lib", "/usr/lib64", "/lib", "/lib64"];
807 for dir in lib_dirs {
808 let dir_path = Path::new(dir);
809 if dir_path.exists() {
810 jail.mount_bind(dir_path, dir_path, false)?;
811 }
812 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800813
814 // Bind mount the wayland socket into jail's root. This is necessary since each
815 // new wayland context must open() the socket.
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700816 if let Some(path) = wayland_socket_path {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900817 jail.mount_bind(path, jailed_wayland_path, true)?;
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700818 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800819
820 add_crosvm_user_to_jail(&mut jail, "gpu")?;
821
David Riley54e660b2019-07-24 17:22:50 -0700822 // pvr driver requires read access to /proc/self/task/*/comm.
823 let proc_path = Path::new("/proc");
824 jail.mount(
825 proc_path,
826 proc_path,
827 "proc",
828 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY) as usize,
829 )?;
830
David Tolnay2b089fc2019-03-04 15:33:22 -0800831 Some(jail)
832 }
833 None => None,
834 };
835
836 Ok(VirtioDeviceStub {
837 dev: Box::new(dev),
838 jail,
839 })
840}
841
842fn create_wayland_device(
843 cfg: &Config,
Gurchetan Singh53edb812019-05-22 08:57:16 -0700844 socket: VmMemoryControlRequestSocket,
David Tolnay2b089fc2019-03-04 15:33:22 -0800845 resource_bridge: Option<virtio::resource_bridge::ResourceRequestSocket>,
846) -> DeviceResult {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900847 let wayland_socket_dirs = cfg
848 .wayland_socket_paths
849 .iter()
850 .map(|(_name, path)| path.parent())
851 .collect::<Option<Vec<_>>>()
852 .ok_or(Error::InvalidWaylandPath)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800853
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100854 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +0100855 let dev = virtio::Wl::new(
856 features,
857 cfg.wayland_socket_paths.clone(),
858 socket,
859 resource_bridge,
860 )
861 .map_err(Error::WaylandDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800862
Matt Delco45caf912019-11-13 08:11:09 -0800863 let jail = match simple_jail(&cfg, "wl_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800864 Some(mut jail) => {
865 // Create a tmpfs in the device's root directory so that we can bind mount the wayland
866 // socket directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
867 jail.mount_with_data(
868 Path::new("none"),
869 Path::new("/"),
870 "tmpfs",
871 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
872 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800873 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800874
875 // Bind mount the wayland socket's directory into jail's root. This is necessary since
876 // each new wayland context must open() the socket. If the wayland socket is ever
877 // destroyed and remade in the same host directory, new connections will be possible
878 // without restarting the wayland device.
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900879 for dir in &wayland_socket_dirs {
880 jail.mount_bind(dir, dir, true)?;
881 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800882 add_crosvm_user_to_jail(&mut jail, "Wayland")?;
883
884 Some(jail)
885 }
886 None => None,
887 };
888
889 Ok(VirtioDeviceStub {
890 dev: Box::new(dev),
891 jail,
892 })
893}
894
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900895#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
896fn create_video_device(
897 cfg: &Config,
898 typ: devices::virtio::VideoDeviceType,
899 resource_bridge: virtio::resource_bridge::ResourceRequestSocket,
900) -> DeviceResult {
901 let jail = match simple_jail(&cfg, "video_device")? {
902 Some(mut jail) => {
903 match typ {
904 devices::virtio::VideoDeviceType::Decoder => {
905 add_crosvm_user_to_jail(&mut jail, "video-decoder")?
906 }
907 devices::virtio::VideoDeviceType::Encoder => {
908 add_crosvm_user_to_jail(&mut jail, "video-encoder")?
909 }
910 };
911
912 // Create a tmpfs in the device's root directory so that we can bind mount files.
913 jail.mount_with_data(
914 Path::new("none"),
915 Path::new("/"),
916 "tmpfs",
917 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
918 "size=67108864",
919 )?;
920
921 // Render node for libvda.
922 let dev_dri_path = Path::new("/dev/dri/renderD128");
923 jail.mount_bind(dev_dri_path, dev_dri_path, false)?;
924
David Stevense341d0a2020-10-08 18:02:32 +0900925 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
926 {
927 // Device nodes used by libdrm through minigbm in libvda on AMD devices.
928 let sys_dev_char_path = Path::new("/sys/dev/char");
929 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
930 let sys_devices_path = Path::new("/sys/devices");
931 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
932
933 // Required for loading dri libraries loaded by minigbm on AMD devices.
934 let lib_dir = Path::new("/usr/lib64");
935 jail.mount_bind(lib_dir, lib_dir, false)?;
936 }
937
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900938 // Device nodes required by libchrome which establishes Mojo connection in libvda.
939 let dev_urandom_path = Path::new("/dev/urandom");
940 jail.mount_bind(dev_urandom_path, dev_urandom_path, false)?;
941 let system_bus_socket_path = Path::new("/run/dbus/system_bus_socket");
942 jail.mount_bind(system_bus_socket_path, system_bus_socket_path, true)?;
943
944 Some(jail)
945 }
946 None => None,
947 };
948
949 Ok(VirtioDeviceStub {
950 dev: Box::new(devices::virtio::VideoDevice::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100951 virtio::base_features(cfg.protected_vm),
Keiichi Watanabe57df6a02019-12-06 22:24:40 +0900952 typ,
953 Some(resource_bridge),
954 )),
955 jail,
956 })
957}
958
959#[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
960fn register_video_device(
961 devs: &mut Vec<VirtioDeviceStub>,
962 resource_bridges: &mut Vec<virtio::resource_bridge::ResourceResponseSocket>,
963 cfg: &Config,
964 typ: devices::virtio::VideoDeviceType,
965) -> std::result::Result<(), Error> {
966 let (video_socket, gpu_socket) =
967 virtio::resource_bridge::pair().map_err(Error::CreateSocket)?;
968 resource_bridges.push(gpu_socket);
969 devs.push(create_video_device(cfg, typ, video_socket)?);
970 Ok(())
971}
972
David Tolnay2b089fc2019-03-04 15:33:22 -0800973fn create_vhost_vsock_device(cfg: &Config, cid: u64, mem: &GuestMemory) -> DeviceResult {
Will Deacon7d2b8ac2020-10-06 18:51:12 +0100974 let features = virtio::base_features(cfg.protected_vm);
Will Deacon81d5adb2020-10-06 18:37:48 +0100975 let dev = virtio::vhost::Vsock::new(features, cid, mem).map_err(Error::VhostVsockDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800976
977 Ok(VirtioDeviceStub {
978 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800979 jail: simple_jail(&cfg, "vhost_vsock_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800980 })
981}
982
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900983fn create_fs_device(
984 cfg: &Config,
985 uid_map: &str,
986 gid_map: &str,
987 src: &Path,
988 tag: &str,
989 fs_cfg: virtio::fs::passthrough::Config,
990) -> DeviceResult {
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900991 let max_open_files = get_max_open_files()?;
Matt Delcoc24ad782020-02-14 13:24:36 -0800992 let j = if cfg.sandbox {
993 let seccomp_policy = cfg.seccomp_policy_dir.join("fs_device");
994 let config = SandboxConfig {
995 limit_caps: false,
996 uid_map: Some(uid_map),
997 gid_map: Some(gid_map),
998 log_failures: cfg.seccomp_log_failures,
999 seccomp_policy: &seccomp_policy,
1000 };
Chirantan Ekbote34d45e52020-04-20 18:15:02 +09001001 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
1002 // We want bind mounts from the parent namespaces to propagate into the fs device's
1003 // namespace.
1004 jail.set_remount_mode(libc::MS_SLAVE);
1005
1006 jail
Matt Delcoc24ad782020-02-14 13:24:36 -08001007 } else {
1008 create_base_minijail(src, Some(max_open_files), None)?
1009 };
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001010
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001011 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001012 // TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic
1013 // when num_queues > 1.
Will Deacon81d5adb2020-10-06 18:37:48 +01001014 let dev = virtio::fs::Fs::new(features, tag, 1, fs_cfg).map_err(Error::FsDeviceNew)?;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001015
1016 Ok(VirtioDeviceStub {
1017 dev: Box::new(dev),
1018 jail: Some(j),
1019 })
1020}
1021
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001022fn create_9p_device(
1023 cfg: &Config,
1024 uid_map: &str,
1025 gid_map: &str,
1026 src: &Path,
1027 tag: &str,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001028 mut p9_cfg: p9::Config,
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001029) -> DeviceResult {
1030 let max_open_files = get_max_open_files()?;
1031 let (jail, root) = if cfg.sandbox {
1032 let seccomp_policy = cfg.seccomp_policy_dir.join("9p_device");
1033 let config = SandboxConfig {
1034 limit_caps: false,
1035 uid_map: Some(uid_map),
1036 gid_map: Some(gid_map),
1037 log_failures: cfg.seccomp_log_failures,
1038 seccomp_policy: &seccomp_policy,
1039 };
David Tolnay2b089fc2019-03-04 15:33:22 -08001040
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001041 let mut jail = create_base_minijail(src, Some(max_open_files), Some(&config))?;
1042 // We want bind mounts from the parent namespaces to propagate into the 9p server's
1043 // namespace.
1044 jail.set_remount_mode(libc::MS_SLAVE);
Chirantan Ekbote055de382020-01-24 12:16:58 +09001045
Chirantan Ekbotec6b73e32020-02-20 15:53:06 +09001046 // The shared directory becomes the root of the device's file system.
1047 let root = Path::new("/");
1048 (Some(jail), root)
1049 } else {
1050 // There's no mount namespace so we tell the server to treat the source directory as the
1051 // root.
1052 (None, src)
David Tolnay2b089fc2019-03-04 15:33:22 -08001053 };
1054
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001055 let features = virtio::base_features(cfg.protected_vm);
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001056 p9_cfg.root = root.into();
1057 let dev = virtio::P9::new(features, tag, p9_cfg).map_err(Error::P9DeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -08001058
1059 Ok(VirtioDeviceStub {
1060 dev: Box::new(dev),
1061 jail,
1062 })
1063}
1064
Jakub Starona3411ea2019-04-24 10:55:25 -07001065fn create_pmem_device(
1066 cfg: &Config,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001067 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001068 resources: &mut SystemAllocator,
1069 disk: &DiskOption,
1070 index: usize,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001071 pmem_device_socket: VmMsyncRequestSocket,
Jakub Starona3411ea2019-04-24 10:55:25 -07001072) -> DeviceResult {
1073 let fd = OpenOptions::new()
1074 .read(true)
1075 .write(!disk.read_only)
1076 .open(&disk.path)
Daniel Verkamp46d61ba2020-02-25 10:17:50 -08001077 .map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001078
Iliyan Malcheved149862020-04-17 23:57:47 +00001079 let arena_size = {
Daniel Verkamp46d61ba2020-02-25 10:17:50 -08001080 let metadata =
1081 std::fs::metadata(&disk.path).map_err(|e| Error::Disk(disk.path.to_path_buf(), e))?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001082 let disk_len = metadata.len();
1083 // Linux requires pmem region sizes to be 2 MiB aligned. Linux will fill any partial page
1084 // at the end of an mmap'd file and won't write back beyond the actual file length, but if
1085 // we just align the size of the file to 2 MiB then access beyond the last page of the
1086 // mapped file will generate SIGBUS. So use a memory mapping arena that will provide
1087 // padding up to 2 MiB.
1088 let alignment = 2 * 1024 * 1024;
1089 let align_adjust = if disk_len % alignment != 0 {
1090 alignment - (disk_len % alignment)
1091 } else {
1092 0
1093 };
Iliyan Malcheved149862020-04-17 23:57:47 +00001094 disk_len
1095 .checked_add(align_adjust)
1096 .ok_or(Error::PmemDeviceImageTooBig)?
Jakub Starona3411ea2019-04-24 10:55:25 -07001097 };
1098
1099 let protection = {
1100 if disk.read_only {
1101 Protection::read()
1102 } else {
1103 Protection::read_write()
1104 }
1105 };
1106
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001107 let arena = {
Jakub Starona3411ea2019-04-24 10:55:25 -07001108 // Conversion from u64 to usize may fail on 32bit system.
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001109 let arena_size = usize::try_from(arena_size).map_err(|_| Error::PmemDeviceImageTooBig)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001110
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001111 let mut arena = MemoryMappingArena::new(arena_size).map_err(Error::ReservePmemMemory)?;
1112 arena
Iliyan Malcheved149862020-04-17 23:57:47 +00001113 .add_fd_offset_protection(0, arena_size, &fd, 0, protection)
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001114 .map_err(Error::ReservePmemMemory)?;
1115 arena
Jakub Starona3411ea2019-04-24 10:55:25 -07001116 };
1117
1118 let mapping_address = resources
Xiong Zhang383b3b52019-10-30 14:59:26 +08001119 .mmio_allocator(MmioType::High)
Jakub Starona3411ea2019-04-24 10:55:25 -07001120 .allocate_with_align(
Stephen Barberdc7c07b2019-12-20 12:43:35 -08001121 arena_size,
Jakub Starona3411ea2019-04-24 10:55:25 -07001122 Alloc::PmemDevice(index),
1123 format!("pmem_disk_image_{}", index),
1124 // Linux kernel requires pmem namespaces to be 128 MiB aligned.
1125 128 * 1024 * 1024, /* 128 MiB */
1126 )
1127 .map_err(Error::AllocatePmemDeviceAddress)?;
1128
Daniel Verkampe1980a92020-02-07 11:00:55 -08001129 let slot = vm
Gurchetan Singh173fe622020-05-21 18:05:06 -07001130 .add_memory_region(
Daniel Verkampe1980a92020-02-07 11:00:55 -08001131 GuestAddress(mapping_address),
Gurchetan Singh173fe622020-05-21 18:05:06 -07001132 Box::new(arena),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001133 /* read_only = */ disk.read_only,
1134 /* log_dirty_pages = */ false,
1135 )
1136 .map_err(Error::AddPmemDeviceMemory)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001137
Daniel Verkampe1980a92020-02-07 11:00:55 -08001138 let dev = virtio::Pmem::new(
Will Deacon7d2b8ac2020-10-06 18:51:12 +01001139 virtio::base_features(cfg.protected_vm),
Daniel Verkampe1980a92020-02-07 11:00:55 -08001140 fd,
1141 GuestAddress(mapping_address),
1142 slot,
1143 arena_size,
1144 Some(pmem_device_socket),
1145 )
1146 .map_err(Error::PmemDeviceNew)?;
Jakub Starona3411ea2019-04-24 10:55:25 -07001147
1148 Ok(VirtioDeviceStub {
1149 dev: Box::new(dev) as Box<dyn VirtioDevice>,
Matt Delco45caf912019-11-13 08:11:09 -08001150 jail: simple_jail(&cfg, "pmem_device")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07001151 })
1152}
1153
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001154fn create_console_device(cfg: &Config, param: &SerialParameters) -> DeviceResult {
Michael Hoylecd23bc22020-10-20 22:12:20 -07001155 let mut keep_rds = Vec::new();
Michael Hoyle685316f2020-09-16 15:29:20 -07001156 let evt = Event::new().map_err(Error::CreateEvent)?;
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001157 let dev = param
Michael Hoylecd23bc22020-10-20 22:12:20 -07001158 .create_serial_device::<Console>(cfg.protected_vm, &evt, &mut keep_rds)
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001159 .map_err(Error::CreateConsole)?;
1160
Nicholas Verne71e73d82020-07-08 17:19:55 +10001161 let jail = match simple_jail(&cfg, "serial")? {
1162 Some(mut jail) => {
1163 // Create a tmpfs in the device's root directory so that we can bind mount the
1164 // log socket directory into it.
1165 // The size=67108864 is size=64*1024*1024 or size=64MB.
1166 jail.mount_with_data(
1167 Path::new("none"),
1168 Path::new("/"),
1169 "tmpfs",
1170 (libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_NOSUID) as usize,
1171 "size=67108864",
1172 )?;
1173 add_crosvm_user_to_jail(&mut jail, "serial")?;
1174 let res = param.add_bind_mounts(&mut jail);
1175 if res.is_err() {
1176 error!("failed to add bind mounts for console device");
1177 }
1178 Some(jail)
1179 }
1180 None => None,
1181 };
1182
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001183 Ok(VirtioDeviceStub {
1184 dev: Box::new(dev),
Nicholas Verne71e73d82020-07-08 17:19:55 +10001185 jail, // TODO(dverkamp): use a separate policy for console?
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001186 })
1187}
1188
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -07001189// gpu_device_socket is not used when GPU support is disabled.
1190#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001191fn create_virtio_devices(
1192 cfg: &Config,
Zach Reizner55a9e502018-10-03 10:22:32 -07001193 mem: &GuestMemory,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001194 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001195 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001196 _exit_evt: &Event,
Gurchetan Singh53edb812019-05-22 08:57:16 -07001197 wayland_device_socket: VmMemoryControlRequestSocket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001198 gpu_device_socket: VmMemoryControlRequestSocket,
Jakub Staron1f828d72019-04-11 12:49:29 -07001199 balloon_device_socket: BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07001200 disk_device_sockets: &mut Vec<DiskControlResponseSocket>,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001201 pmem_device_sockets: &mut Vec<VmMsyncRequestSocket>,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001202 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnay2b089fc2019-03-04 15:33:22 -08001203) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -07001204 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -08001205
Daniel Verkampa7b6a1c2020-03-09 13:16:46 -07001206 for (_, param) in cfg
1207 .serial_parameters
1208 .iter()
1209 .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole)
1210 {
1211 let dev = create_console_device(cfg, param)?;
1212 devs.push(dev);
1213 }
1214
Zach Reizner8fb52112017-12-13 16:04:39 -08001215 for disk in &cfg.disks {
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001216 let disk_device_socket = disk_device_sockets.remove(0);
David Tolnay2b089fc2019-03-04 15:33:22 -08001217 devs.push(create_block_device(cfg, disk, disk_device_socket)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001218 }
1219
Jakub Starona3411ea2019-04-24 10:55:25 -07001220 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
Daniel Verkampe1980a92020-02-07 11:00:55 -08001221 let pmem_device_socket = pmem_device_sockets.remove(0);
1222 devs.push(create_pmem_device(
1223 cfg,
1224 vm,
1225 resources,
1226 pmem_disk,
1227 index,
1228 pmem_device_socket,
1229 )?);
Jakub Starona3411ea2019-04-24 10:55:25 -07001230 }
1231
David Tolnay2b089fc2019-03-04 15:33:22 -08001232 devs.push(create_rng_device(cfg)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001233
David Tolnayde6b29a2018-12-20 11:49:46 -08001234 #[cfg(feature = "tpm")]
1235 {
David Tolnay43f8e212019-02-13 17:28:16 -08001236 if cfg.software_tpm {
David Tolnay2b089fc2019-03-04 15:33:22 -08001237 devs.push(create_tpm_device(cfg)?);
David Tolnay43f8e212019-02-13 17:28:16 -08001238 }
David Tolnayde6b29a2018-12-20 11:49:46 -08001239 }
1240
Jorge E. Moreira99d3f082019-03-07 10:59:54 -08001241 if let Some(single_touch_spec) = &cfg.virtio_single_touch {
1242 devs.push(create_single_touch_device(cfg, single_touch_spec)?);
1243 }
1244
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001245 if let Some(trackpad_spec) = &cfg.virtio_trackpad {
David Tolnay2b089fc2019-03-04 15:33:22 -08001246 devs.push(create_trackpad_device(cfg, trackpad_spec)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001247 }
1248
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001249 if let Some(mouse_socket) = &cfg.virtio_mouse {
David Tolnay2b089fc2019-03-04 15:33:22 -08001250 devs.push(create_mouse_device(cfg, mouse_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001251 }
1252
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001253 if let Some(keyboard_socket) = &cfg.virtio_keyboard {
David Tolnay2b089fc2019-03-04 15:33:22 -08001254 devs.push(create_keyboard_device(cfg, keyboard_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001255 }
1256
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001257 for dev_path in &cfg.virtio_input_evdevs {
David Tolnay2b089fc2019-03-04 15:33:22 -08001258 devs.push(create_vinput_device(cfg, dev_path)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001259 }
1260
David Tolnay2b089fc2019-03-04 15:33:22 -08001261 devs.push(create_balloon_device(cfg, balloon_device_socket)?);
Dylan Reid295ccac2017-11-06 14:06:24 -08001262
Zach Reizner39aa26b2017-12-12 18:03:23 -08001263 // We checked above that if the IP is defined, then the netmask is, too.
Jianxun Zhang8f4d7682019-02-21 12:55:31 -08001264 for tap_fd in &cfg.tap_fd {
David Tolnay2b089fc2019-03-04 15:33:22 -08001265 devs.push(create_tap_net_device(cfg, *tap_fd)?);
Jorge E. Moreirab7952802019-02-12 16:43:05 -08001266 }
1267
David Tolnay2b089fc2019-03-04 15:33:22 -08001268 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
1269 (cfg.host_ip, cfg.netmask, cfg.mac_address)
1270 {
1271 devs.push(create_net_device(cfg, host_ip, netmask, mac_address, mem)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001272 }
1273
David Tolnayfa701712019-02-13 16:42:54 -08001274 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001275 let mut resource_bridges = Vec::<virtio::resource_bridge::ResourceResponseSocket>::new();
1276
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001277 if !cfg.wayland_socket_paths.is_empty() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001278 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
1279 let mut wl_resource_bridge = None::<virtio::resource_bridge::ResourceRequestSocket>;
1280
1281 #[cfg(feature = "gpu")]
1282 {
Jason Macnakcc7070b2019-11-06 14:48:12 -08001283 if cfg.gpu_parameters.is_some() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001284 let (wl_socket, gpu_socket) =
1285 virtio::resource_bridge::pair().map_err(Error::CreateSocket)?;
1286 resource_bridges.push(gpu_socket);
1287 wl_resource_bridge = Some(wl_socket);
1288 }
1289 }
1290
1291 devs.push(create_wayland_device(
1292 cfg,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001293 wayland_device_socket,
1294 wl_resource_bridge,
1295 )?);
1296 }
David Tolnayfa701712019-02-13 16:42:54 -08001297
Keiichi Watanabe57df6a02019-12-06 22:24:40 +09001298 #[cfg(feature = "video-decoder")]
1299 {
1300 if cfg.video_dec {
1301 register_video_device(
1302 &mut devs,
1303 &mut resource_bridges,
1304 cfg,
1305 devices::virtio::VideoDeviceType::Decoder,
1306 )?;
1307 }
1308 }
1309
1310 #[cfg(feature = "video-encoder")]
1311 {
1312 if cfg.video_enc {
1313 register_video_device(
1314 &mut devs,
1315 &mut resource_bridges,
1316 cfg,
1317 devices::virtio::VideoDeviceType::Encoder,
1318 )?;
1319 }
1320 }
1321
Zach Reizner3a8100a2017-09-13 19:15:43 -07001322 #[cfg(feature = "gpu")]
1323 {
Noah Golddc7f52b2020-02-01 13:01:58 -08001324 if let Some(gpu_parameters) = &cfg.gpu_parameters {
Zach Reizner65b98f12019-11-22 17:34:58 -08001325 let mut event_devices = Vec::new();
1326 if cfg.display_window_mouse {
1327 let (event_device_socket, virtio_dev_socket) =
1328 UnixStream::pair().map_err(Error::CreateSocket)?;
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001329 let (single_touch_width, single_touch_height) = cfg
1330 .virtio_single_touch
1331 .as_ref()
1332 .map(|single_touch_spec| single_touch_spec.get_size())
Noah Golddc7f52b2020-02-01 13:01:58 -08001333 .unwrap_or((gpu_parameters.display_width, gpu_parameters.display_height));
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001334 let dev = virtio::new_single_touch(
1335 virtio_dev_socket,
1336 single_touch_width,
1337 single_touch_height,
Noah Goldd4ca29b2020-10-27 12:21:52 -07001338 virtio::base_features(cfg.protected_vm),
Kaiyi Libccb4eb2020-02-06 17:53:11 -08001339 )
1340 .map_err(Error::InputDeviceNew)?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001341 devs.push(VirtioDeviceStub {
1342 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -08001343 jail: simple_jail(&cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001344 });
1345 event_devices.push(EventDevice::touchscreen(event_device_socket));
1346 }
1347 if cfg.display_window_keyboard {
1348 let (event_device_socket, virtio_dev_socket) =
1349 UnixStream::pair().map_err(Error::CreateSocket)?;
Noah Goldd4ca29b2020-10-27 12:21:52 -07001350 let dev = virtio::new_keyboard(
1351 virtio_dev_socket,
1352 virtio::base_features(cfg.protected_vm),
1353 )
1354 .map_err(Error::InputDeviceNew)?;
Zach Reizner65b98f12019-11-22 17:34:58 -08001355 devs.push(VirtioDeviceStub {
1356 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -08001357 jail: simple_jail(&cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001358 });
1359 event_devices.push(EventDevice::keyboard(event_device_socket));
1360 }
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001361 devs.push(create_gpu_device(
1362 cfg,
1363 _exit_evt,
1364 gpu_device_socket,
1365 resource_bridges,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001366 // Use the unnamed socket for GPU display screens.
1367 cfg.wayland_socket_paths.get(""),
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001368 cfg.x_display.clone(),
Zach Reizner65b98f12019-11-22 17:34:58 -08001369 event_devices,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001370 map_request,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001371 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -07001372 }
1373 }
1374
Zach Reizneraa575662018-08-15 10:46:32 -07001375 if let Some(cid) = cfg.cid {
David Tolnay2b089fc2019-03-04 15:33:22 -08001376 devs.push(create_vhost_vsock_device(cfg, cid, mem)?);
Zach Reizneraa575662018-08-15 10:46:32 -07001377 }
1378
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001379 for shared_dir in &cfg.shared_dirs {
1380 let SharedDir {
1381 src,
1382 tag,
1383 kind,
1384 uid_map,
1385 gid_map,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001386 fs_cfg,
1387 p9_cfg,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001388 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -08001389
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001390 let dev = match kind {
1391 SharedDirKind::FS => create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone())?,
Chirantan Ekbote75ba8752020-10-27 18:33:02 +09001392 SharedDirKind::P9 => create_9p_device(cfg, uid_map, gid_map, src, tag, p9_cfg.clone())?,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001393 };
1394 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -08001395 }
1396
1397 Ok(devs)
1398}
1399
1400fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -06001401 cfg: &Config,
David Tolnay2b089fc2019-03-04 15:33:22 -08001402 mem: &GuestMemory,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001403 vm: &mut impl Vm,
Jakub Starona3411ea2019-04-24 10:55:25 -07001404 resources: &mut SystemAllocator,
Michael Hoyle685316f2020-09-16 15:29:20 -07001405 exit_evt: &Event,
Xiong Zhanga5d248c2019-09-17 14:17:19 -07001406 control_sockets: &mut Vec<TaggedControlSocket>,
Gurchetan Singh53edb812019-05-22 08:57:16 -07001407 wayland_device_socket: VmMemoryControlRequestSocket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001408 gpu_device_socket: VmMemoryControlRequestSocket,
Jakub Staron1f828d72019-04-11 12:49:29 -07001409 balloon_device_socket: BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07001410 disk_device_sockets: &mut Vec<DiskControlResponseSocket>,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001411 pmem_device_sockets: &mut Vec<VmMsyncRequestSocket>,
Jingkui Wang100e6e42019-03-08 20:41:57 -08001412 usb_provider: HostBackendDeviceProvider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001413 map_request: Arc<Mutex<Option<ExternalMapping>>>,
David Tolnayfdac5ed2019-03-08 16:56:14 -08001414) -> DeviceResult<Vec<(Box<dyn PciDevice>, Option<Minijail>)>> {
David Tolnay2b089fc2019-03-04 15:33:22 -08001415 let stubs = create_virtio_devices(
1416 &cfg,
1417 mem,
Jakub Starona3411ea2019-04-24 10:55:25 -07001418 vm,
1419 resources,
David Tolnay2b089fc2019-03-04 15:33:22 -08001420 exit_evt,
1421 wayland_device_socket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001422 gpu_device_socket,
David Tolnay2b089fc2019-03-04 15:33:22 -08001423 balloon_device_socket,
1424 disk_device_sockets,
Daniel Verkampe1980a92020-02-07 11:00:55 -08001425 pmem_device_sockets,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08001426 map_request,
David Tolnay2b089fc2019-03-04 15:33:22 -08001427 )?;
1428
1429 let mut pci_devices = Vec::new();
1430
1431 for stub in stubs {
Daniel Verkampbb712d62019-11-19 09:47:33 -08001432 let (msi_host_socket, msi_device_socket) =
1433 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
1434 control_sockets.push(TaggedControlSocket::VmIrq(msi_host_socket));
1435 let dev = VirtioPciDevice::new(mem.clone(), stub.dev, msi_device_socket)
1436 .map_err(Error::VirtioPciDev)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -08001437 let dev = Box::new(dev) as Box<dyn PciDevice>;
David Tolnay2b089fc2019-03-04 15:33:22 -08001438 pci_devices.push((dev, stub.jail));
1439 }
1440
Andrew Scull1590e6f2020-03-18 18:00:47 +00001441 #[cfg(feature = "audio")]
Judy Hsiaod5c1e962020-02-04 12:30:01 +08001442 for ac97_param in &cfg.ac97_parameters {
1443 let dev = Ac97Dev::try_new(mem.clone(), ac97_param.clone()).map_err(Error::CreateAc97)?;
paulhsiace17e6e2020-08-28 18:37:45 +08001444 let jail = simple_jail(&cfg, dev.minijail_policy())?;
1445 pci_devices.push((Box::new(dev), jail));
David Tolnay2b089fc2019-03-04 15:33:22 -08001446 }
Andrew Scull1590e6f2020-03-18 18:00:47 +00001447
Jingkui Wang100e6e42019-03-08 20:41:57 -08001448 // Create xhci controller.
1449 let usb_controller = Box::new(XhciController::new(mem.clone(), usb_provider));
Matt Delco45caf912019-11-13 08:11:09 -08001450 pci_devices.push((usb_controller, simple_jail(&cfg, "xhci")?));
David Tolnay2b089fc2019-03-04 15:33:22 -08001451
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001452 if !cfg.vfio.is_empty() {
Xiong Zhangea6cf662019-11-11 18:32:02 +08001453 let vfio_container = Arc::new(Mutex::new(
1454 VfioContainer::new().map_err(Error::CreateVfioDevice)?,
1455 ));
1456
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001457 for vfio_path in &cfg.vfio {
Daniel Verkamp10154a92020-09-28 17:44:40 -07001458 // create MSI, MSI-X, and Mem request sockets for each vfio device
1459 let (vfio_host_socket_msi, vfio_device_socket_msi) =
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001460 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
Daniel Verkamp10154a92020-09-28 17:44:40 -07001461 control_sockets.push(TaggedControlSocket::VmIrq(vfio_host_socket_msi));
1462
1463 let (vfio_host_socket_msix, vfio_device_socket_msix) =
1464 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
1465 control_sockets.push(TaggedControlSocket::VmIrq(vfio_host_socket_msix));
Xiong Zhang4b5bb3a2019-04-23 17:15:21 +08001466
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001467 let (vfio_host_socket_mem, vfio_device_socket_mem) =
1468 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>()
1469 .map_err(Error::CreateSocket)?;
1470 control_sockets.push(TaggedControlSocket::VmMemory(vfio_host_socket_mem));
Xiong Zhang85abeff2019-04-23 17:15:24 +08001471
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001472 let vfiodevice = VfioDevice::new(vfio_path.as_path(), vm, mem, vfio_container.clone())
1473 .map_err(Error::CreateVfioDevice)?;
1474 let vfiopcidevice = Box::new(VfioPciDevice::new(
1475 vfiodevice,
Daniel Verkamp10154a92020-09-28 17:44:40 -07001476 vfio_device_socket_msi,
1477 vfio_device_socket_msix,
Xiong Zhang8bb4faa2019-11-12 10:06:13 +08001478 vfio_device_socket_mem,
1479 ));
1480 pci_devices.push((vfiopcidevice, simple_jail(&cfg, "vfio_device")?));
1481 }
Xiong Zhang17b0daf2019-04-23 17:14:50 +08001482 }
1483
David Tolnay2b089fc2019-03-04 15:33:22 -08001484 Ok(pci_devices)
1485}
1486
1487#[derive(Copy, Clone)]
Chirantan Ekbote1a2683b2019-11-26 16:28:23 +09001488#[cfg_attr(not(feature = "tpm"), allow(dead_code))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001489struct Ids {
1490 uid: uid_t,
1491 gid: gid_t,
1492}
1493
David Tolnay48c48292019-03-01 16:54:25 -08001494// Set the uid/gid for the jailed process and give a basic id map. This is
1495// required for bind mounts to work.
David Tolnayfd0971d2019-03-04 17:15:57 -08001496fn add_crosvm_user_to_jail(jail: &mut Minijail, feature: &str) -> Result<Ids> {
David Tolnay48c48292019-03-01 16:54:25 -08001497 let crosvm_user_group = CStr::from_bytes_with_nul(b"crosvm\0").unwrap();
1498
1499 let crosvm_uid = match get_user_id(&crosvm_user_group) {
1500 Ok(u) => u,
1501 Err(e) => {
1502 warn!("falling back to current user id for {}: {}", feature, e);
1503 geteuid()
1504 }
1505 };
1506
1507 let crosvm_gid = match get_group_id(&crosvm_user_group) {
1508 Ok(u) => u,
1509 Err(e) => {
1510 warn!("falling back to current group id for {}: {}", feature, e);
1511 getegid()
1512 }
1513 };
1514
1515 jail.change_uid(crosvm_uid);
1516 jail.change_gid(crosvm_gid);
1517 jail.uidmap(&format!("{0} {0} 1", crosvm_uid))
1518 .map_err(Error::SettingUidMap)?;
1519 jail.gidmap(&format!("{0} {0} 1", crosvm_gid))
1520 .map_err(Error::SettingGidMap)?;
1521
David Tolnay41a6f842019-03-01 16:18:44 -08001522 Ok(Ids {
1523 uid: crosvm_uid,
1524 gid: crosvm_gid,
1525 })
David Tolnay48c48292019-03-01 16:54:25 -08001526}
1527
David Tolnayfd0971d2019-03-04 17:15:57 -08001528fn raw_fd_from_path(path: &Path) -> Result<RawFd> {
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001529 if !path.is_file() {
David Tolnayfd0971d2019-03-04 17:15:57 -08001530 return Err(Error::InvalidFdPath);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001531 }
1532 let raw_fd = path
1533 .file_name()
1534 .and_then(|fd_osstr| fd_osstr.to_str())
1535 .and_then(|fd_str| fd_str.parse::<c_int>().ok())
1536 .ok_or(Error::InvalidFdPath)?;
David Tolnayfd0971d2019-03-04 17:15:57 -08001537 validate_raw_fd(raw_fd).map_err(Error::ValidateRawFd)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001538}
1539
Zach Reizner65b98f12019-11-22 17:34:58 -08001540trait IntoUnixStream {
1541 fn into_unix_stream(self) -> Result<UnixStream>;
1542}
1543
1544impl<'a> IntoUnixStream for &'a Path {
1545 fn into_unix_stream(self) -> Result<UnixStream> {
1546 if self.parent() == Some(Path::new("/proc/self/fd")) {
1547 // Safe because we will validate |raw_fd|.
1548 unsafe { Ok(UnixStream::from_raw_fd(raw_fd_from_path(self)?)) }
1549 } else {
1550 UnixStream::connect(self).map_err(Error::InputEventsOpen)
1551 }
1552 }
1553}
1554impl<'a> IntoUnixStream for &'a PathBuf {
1555 fn into_unix_stream(self) -> Result<UnixStream> {
1556 self.as_path().into_unix_stream()
1557 }
1558}
1559
1560impl IntoUnixStream for UnixStream {
1561 fn into_unix_stream(self) -> Result<UnixStream> {
1562 Ok(self)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001563 }
1564}
1565
Steven Richmanf32d0b42020-06-20 21:45:32 -07001566fn setup_vcpu_signal_handler<T: Vcpu>(use_hypervisor_signals: bool) -> Result<()> {
1567 if use_hypervisor_signals {
Matt Delco84cf9c02019-10-07 22:38:13 -07001568 unsafe {
1569 extern "C" fn handle_signal() {}
1570 // Our signal handler does nothing and is trivially async signal safe.
1571 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
1572 .map_err(Error::RegisterSignalHandler)?;
1573 }
1574 block_signal(SIGRTMIN() + 0).map_err(Error::BlockSignal)?;
1575 } else {
1576 unsafe {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001577 extern "C" fn handle_signal<T: Vcpu>() {
1578 T::set_local_immediate_exit(true);
Matt Delco84cf9c02019-10-07 22:38:13 -07001579 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001580 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal::<T>)
Matt Delco84cf9c02019-10-07 22:38:13 -07001581 .map_err(Error::RegisterSignalHandler)?;
1582 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001583 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001584 Ok(())
1585}
1586
Steven Richmanf32d0b42020-06-20 21:45:32 -07001587// Sets up a vcpu and converts it into a runnable vcpu.
Zach Reizner2c770e62020-09-30 16:49:59 -07001588fn runnable_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001589 cpu_id: usize,
1590 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07001591 vm: impl VmArch,
1592 irq_chip: &mut impl IrqChipArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001593 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001594 run_rt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001595 vcpu_affinity: Vec<usize>,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001596 no_smt: bool,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001597 has_bios: bool,
1598 use_hypervisor_signals: bool,
Zach Reizner2c770e62020-09-30 16:49:59 -07001599) -> Result<(V, VcpuRunHandle)>
Steven Richmanf32d0b42020-06-20 21:45:32 -07001600where
Zach Reizner2c770e62020-09-30 16:49:59 -07001601 V: VcpuArch,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001602{
Zach Reizner304e7312020-09-29 16:00:24 -07001603 let mut vcpu = match vcpu {
1604 Some(v) => v,
1605 None => {
1606 // If vcpu is None, it means this arch/hypervisor requires create_vcpu to be called from
1607 // the vcpu thread.
1608 match vm
1609 .create_vcpu(cpu_id)
1610 .map_err(Error::CreateVcpu)?
1611 .downcast::<V>()
1612 {
1613 Ok(v) => *v,
1614 Err(_) => panic!("VM created wrong type of VCPU"),
1615 }
1616 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001617 };
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001618
Steven Richmanf32d0b42020-06-20 21:45:32 -07001619 irq_chip
Zach Reizner304e7312020-09-29 16:00:24 -07001620 .add_vcpu(cpu_id, &vcpu)
Steven Richmanf32d0b42020-06-20 21:45:32 -07001621 .map_err(Error::AddIrqChipVcpu)?;
1622
Daniel Verkampcaf9ced2020-09-29 15:35:02 -07001623 if !vcpu_affinity.is_empty() {
1624 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
1625 error!("Failed to set CPU affinity: {}", e);
1626 }
1627 }
1628
Steven Richmanf32d0b42020-06-20 21:45:32 -07001629 Arch::configure_vcpu(
1630 vm.get_memory(),
1631 vm.get_hypervisor(),
1632 irq_chip,
1633 &mut vcpu,
1634 cpu_id,
1635 vcpu_count,
1636 has_bios,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001637 no_smt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001638 )
1639 .map_err(Error::ConfigureVcpu)?;
1640
Steven Richmanf32d0b42020-06-20 21:45:32 -07001641 #[cfg(feature = "chromeos")]
1642 if let Err(e) = base::sched::enable_core_scheduling() {
1643 error!("Failed to enable core scheduling: {}", e);
1644 }
1645
Kansho Nishidaab205af2020-08-13 18:17:50 +09001646 if run_rt {
1647 const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
1648 if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
1649 .and_then(|_| set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL)))
1650 {
1651 warn!("Failed to set vcpu to real time: {}", e);
1652 }
1653 }
1654
Steven Richmanf32d0b42020-06-20 21:45:32 -07001655 if use_hypervisor_signals {
1656 let mut v = get_blocked_signals().map_err(Error::GetSignalMask)?;
1657 v.retain(|&x| x != SIGRTMIN() + 0);
1658 vcpu.set_signal_mask(&v).map_err(Error::SettingSignalMask)?;
1659 }
1660
Zach Reizner2c770e62020-09-30 16:49:59 -07001661 let vcpu_run_handle = vcpu
1662 .take_run_handle(Some(SIGRTMIN() + 0))
1663 .map_err(Error::RunnableVcpu)?;
1664
1665 Ok((vcpu, vcpu_run_handle))
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001666}
1667
Zhuocheng Dingdb4c70d2019-12-02 15:50:24 +08001668#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Zach Reizner304e7312020-09-29 16:00:24 -07001669fn inject_interrupt(irq_chip: &mut dyn IrqChipX86_64, vcpu: &dyn VcpuX86_64, vcpu_id: usize) {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001670 if !irq_chip.interrupt_requested(vcpu_id) || !vcpu.ready_for_interrupt() {
1671 return;
1672 }
1673
1674 let vector = irq_chip
1675 .get_external_interrupt(vcpu_id)
1676 .unwrap_or_else(|e| {
1677 error!("get_external_interrupt failed on vcpu {}: {}", vcpu_id, e);
1678 None
1679 });
1680 if let Some(vector) = vector {
1681 if let Err(e) = vcpu.interrupt(vector as u32) {
1682 error!(
1683 "Failed to inject interrupt {} to vcpu {}: {}",
1684 vector, vcpu_id, e
1685 );
Zhuocheng Dingdb4c70d2019-12-02 15:50:24 +08001686 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001687 }
1688
1689 // The second interrupt request should be handled immediately, so ask vCPU to exit as soon as
1690 // possible.
1691 if irq_chip.interrupt_requested(vcpu_id) {
Steven Richmance33ace2020-09-02 19:25:33 -07001692 vcpu.set_interrupt_window_requested(true);
Zhuocheng Dingdb4c70d2019-12-02 15:50:24 +08001693 }
1694}
1695
1696#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
Zach Reizner304e7312020-09-29 16:00:24 -07001697fn inject_interrupt(_irq_chip: &mut dyn IrqChip, _vcpu: &dyn Vcpu, _vcpu_id: usize) {}
Zhuocheng Dingdb4c70d2019-12-02 15:50:24 +08001698
Zach Reizner2c770e62020-09-30 16:49:59 -07001699fn run_vcpu<V>(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001700 cpu_id: usize,
1701 vcpu: Option<V>,
Zach Reizner304e7312020-09-29 16:00:24 -07001702 vm: impl VmArch + 'static,
1703 mut irq_chip: impl IrqChipArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001704 vcpu_count: usize,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001705 run_rt: bool,
Daniel Verkamp107edb32019-04-05 09:58:48 -07001706 vcpu_affinity: Vec<usize>,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001707 no_smt: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001708 start_barrier: Arc<Barrier>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001709 has_bios: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001710 io_bus: devices::Bus,
1711 mmio_bus: devices::Bus,
Michael Hoyle685316f2020-09-16 15:29:20 -07001712 exit_evt: Event,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001713 requires_pvclock_ctrl: bool,
Dylan Reidb0492662019-05-17 14:50:13 -07001714 from_main_channel: mpsc::Receiver<VmRunMode>,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001715 use_hypervisor_signals: bool,
1716) -> Result<JoinHandle<()>>
1717where
Zach Reizner2c770e62020-09-30 16:49:59 -07001718 V: VcpuArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001719{
Zach Reizner8fb52112017-12-13 16:04:39 -08001720 thread::Builder::new()
1721 .name(format!("crosvm_vcpu{}", cpu_id))
1722 .spawn(move || {
Zach Reizner95885312020-01-29 18:06:01 -08001723 // The VCPU thread must trigger the `exit_evt` in all paths, and a `ScopedEvent`'s Drop
1724 // implementation accomplishes that.
1725 let _scoped_exit_evt = ScopedEvent::from(exit_evt);
1726
Zach Reizner2c770e62020-09-30 16:49:59 -07001727 let runnable_vcpu = runnable_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07001728 cpu_id,
1729 vcpu,
1730 vm,
1731 &mut irq_chip,
1732 vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09001733 run_rt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001734 vcpu_affinity,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09001735 no_smt,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001736 has_bios,
1737 use_hypervisor_signals,
1738 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08001739
Zach Reizner8fb52112017-12-13 16:04:39 -08001740 start_barrier.wait();
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001741
Zach Reizner2c770e62020-09-30 16:49:59 -07001742 let (vcpu, vcpu_run_handle) = match runnable_vcpu {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001743 Ok(v) => v,
1744 Err(e) => {
1745 error!("failed to start vcpu {}: {}", cpu_id, e);
1746 return;
1747 }
1748 };
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001749
Dylan Reidb0492662019-05-17 14:50:13 -07001750 let mut run_mode = VmRunMode::Running;
1751 let mut interrupted_by_signal = false;
1752
1753 'vcpu_loop: loop {
1754 // Start by checking for messages to process and the run state of the CPU.
1755 // An extra check here for Running so there isn't a need to call recv unless a
1756 // message is likely to be ready because a signal was sent.
1757 if interrupted_by_signal || run_mode != VmRunMode::Running {
1758 'state_loop: loop {
1759 // Tries to get a pending message without blocking first.
1760 let msg = match from_main_channel.try_recv() {
1761 Ok(m) => m,
1762 Err(mpsc::TryRecvError::Empty) if run_mode == VmRunMode::Running => {
1763 // If the VM is running and no message is pending, the state won't
1764 // be changed.
1765 break 'state_loop;
1766 }
1767 Err(mpsc::TryRecvError::Empty) => {
1768 // If the VM is not running, wait until a message is ready.
1769 match from_main_channel.recv() {
1770 Ok(m) => m,
1771 Err(mpsc::RecvError) => {
1772 error!("Failed to read from main channel in vcpu");
1773 break 'vcpu_loop;
1774 }
1775 }
1776 }
1777 Err(mpsc::TryRecvError::Disconnected) => {
1778 error!("Failed to read from main channel in vcpu");
1779 break 'vcpu_loop;
1780 }
1781 };
1782
1783 // Collect all pending messages.
1784 let mut messages = vec![msg];
1785 messages.append(&mut from_main_channel.try_iter().collect());
1786
1787 for new_mode in messages {
1788 run_mode = new_mode.clone();
1789 match run_mode {
1790 VmRunMode::Running => break 'state_loop,
1791 VmRunMode::Suspending => {
1792 // On KVM implementations that use a paravirtualized clock (e.g.
1793 // x86), a flag must be set to indicate to the guest kernel that a
1794 // VCPU was suspended. The guest kernel will use this flag to
1795 // prevent the soft lockup detection from triggering when this VCPU
1796 // resumes, which could happen days later in realtime.
1797 if requires_pvclock_ctrl {
1798 if let Err(e) = vcpu.pvclock_ctrl() {
1799 error!(
1800 "failed to tell hypervisor vcpu {} is suspending: {}",
1801 cpu_id, e
1802 );
1803 }
1804 }
1805 }
1806 VmRunMode::Exiting => break 'vcpu_loop,
1807 }
1808 }
1809 }
1810 }
1811
1812 interrupted_by_signal = false;
1813
Zach Reizner2c770e62020-09-30 16:49:59 -07001814 match vcpu.run(&vcpu_run_handle) {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001815 Ok(VcpuExit::IoIn { port, mut size }) => {
1816 let mut data = [0; 8];
1817 if size > data.len() {
1818 error!("unsupported IoIn size of {} bytes", size);
1819 size = data.len();
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001820 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001821 io_bus.read(port as u64, &mut data[..size]);
1822 if let Err(e) = vcpu.set_data(&data[..size]) {
1823 error!("failed to set return data for IoIn: {}", e);
1824 }
1825 }
1826 Ok(VcpuExit::IoOut {
1827 port,
1828 mut size,
1829 data,
1830 }) => {
1831 if size > data.len() {
1832 error!("unsupported IoOut size of {} bytes", size);
1833 size = data.len();
1834 }
1835 io_bus.write(port as u64, &data[..size]);
1836 }
1837 Ok(VcpuExit::MmioRead { address, size }) => {
1838 let mut data = [0; 8];
1839 mmio_bus.read(address, &mut data[..size]);
1840 // Setting data for mmio can not fail.
1841 let _ = vcpu.set_data(&data[..size]);
1842 }
1843 Ok(VcpuExit::MmioWrite {
1844 address,
1845 size,
1846 data,
1847 }) => {
1848 mmio_bus.write(address, &data[..size]);
1849 }
1850 Ok(VcpuExit::IoapicEoi { vector }) => {
1851 if let Err(e) = irq_chip.broadcast_eoi(vector) {
1852 error!(
1853 "failed to broadcast eoi {} on vcpu {}: {}",
1854 vector, cpu_id, e
1855 );
1856 }
1857 }
1858 Ok(VcpuExit::Hlt) => break,
1859 Ok(VcpuExit::Shutdown) => break,
1860 Ok(VcpuExit::FailEntry {
1861 hardware_entry_failure_reason,
1862 }) => {
1863 error!("vcpu hw run failure: {:#x}", hardware_entry_failure_reason);
1864 break;
1865 }
1866 Ok(VcpuExit::SystemEvent(_, _)) => break,
1867 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
1868 Err(e) => match e.errno() {
1869 libc::EINTR => interrupted_by_signal = true,
1870 libc::EAGAIN => {}
1871 _ => {
1872 error!("vcpu hit unknown error: {}", e);
1873 break;
1874 }
1875 },
1876 }
1877
1878 if interrupted_by_signal {
1879 if use_hypervisor_signals {
1880 // Try to clear the signal that we use to kick VCPU if it is pending before
1881 // attempting to handle pause requests.
1882 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
1883 error!("failed to clear pending signal: {}", e);
1884 break;
1885 }
1886 } else {
1887 vcpu.set_immediate_exit(false);
1888 }
David Tolnay8f3a2322018-11-30 17:11:35 -08001889 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07001890
Zach Reizner2c770e62020-09-30 16:49:59 -07001891 inject_interrupt(&mut irq_chip, &vcpu, cpu_id);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001892 }
David Tolnay2bac1e72018-12-12 14:33:42 -08001893 })
1894 .map_err(Error::SpawnVcpu)
Zach Reizner39aa26b2017-12-12 18:03:23 -08001895}
1896
Charles William Dick0bf8a552019-10-29 15:36:01 +09001897// Reads the contents of a file and converts the space-separated fields into a Vec of i64s.
Sonny Raod5f66082019-04-24 12:24:38 -07001898// Returns an error if any of the fields fail to parse.
Charles William Dick0bf8a552019-10-29 15:36:01 +09001899fn file_fields_to_i64<P: AsRef<Path>>(path: P) -> io::Result<Vec<i64>> {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001900 let mut file = File::open(path)?;
1901
1902 let mut buf = [0u8; 32];
1903 let count = file.read(&mut buf)?;
1904
Zach Reizner55a9e502018-10-03 10:22:32 -07001905 let content =
1906 str::from_utf8(&buf[..count]).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1907 content
1908 .trim()
Sonny Raod5f66082019-04-24 12:24:38 -07001909 .split_whitespace()
1910 .map(|x| {
Charles William Dick0bf8a552019-10-29 15:36:01 +09001911 x.parse::<i64>()
Sonny Raod5f66082019-04-24 12:24:38 -07001912 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
1913 })
1914 .collect()
1915}
1916
1917// Reads the contents of a file and converts them into a u64, and if there
1918// are multiple fields it only returns the first one.
Charles William Dick0bf8a552019-10-29 15:36:01 +09001919fn file_to_i64<P: AsRef<Path>>(path: P) -> io::Result<i64> {
1920 file_fields_to_i64(path)?
Sonny Raod5f66082019-04-24 12:24:38 -07001921 .into_iter()
1922 .next()
1923 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "empty file"))
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001924}
1925
Steven Richmanf32d0b42020-06-20 21:45:32 -07001926fn create_kvm(mem: GuestMemory) -> base::Result<KvmVm> {
1927 let kvm = Kvm::new()?;
1928 let vm = KvmVm::new(&kvm, mem)?;
1929 Ok(vm)
1930}
1931
1932fn create_kvm_kernel_irq_chip(
1933 vm: &KvmVm,
1934 vcpu_count: usize,
1935 _ioapic_device_socket: VmIrqRequestSocket,
Zach Reizner304e7312020-09-29 16:00:24 -07001936) -> base::Result<impl IrqChipArch> {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001937 let irq_chip = KvmKernelIrqChip::new(vm.try_clone()?, vcpu_count)?;
1938 Ok(irq_chip)
1939}
1940
1941#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1942fn create_kvm_split_irq_chip(
1943 vm: &KvmVm,
1944 vcpu_count: usize,
1945 ioapic_device_socket: VmIrqRequestSocket,
Zach Reizner304e7312020-09-29 16:00:24 -07001946) -> base::Result<impl IrqChipArch> {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001947 let irq_chip = KvmSplitIrqChip::new(vm.try_clone()?, vcpu_count, ioapic_device_socket)?;
1948 Ok(irq_chip)
1949}
1950
Dylan Reid059a1882018-07-23 17:58:09 -07001951pub fn run_config(cfg: Config) -> Result<()> {
Steven Richmanf32d0b42020-06-20 21:45:32 -07001952 if cfg.split_irqchip {
1953 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
1954 {
1955 unimplemented!("KVM split irqchip mode only supported on x86 processors")
1956 }
1957
1958 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1959 {
Zach Reizner304e7312020-09-29 16:00:24 -07001960 run_vm::<_, KvmVcpu, _, _, _>(cfg, create_kvm, create_kvm_split_irq_chip)
Steven Richmanf32d0b42020-06-20 21:45:32 -07001961 }
1962 } else {
Zach Reizner304e7312020-09-29 16:00:24 -07001963 run_vm::<_, KvmVcpu, _, _, _>(cfg, create_kvm, create_kvm_kernel_irq_chip)
Steven Richmanf32d0b42020-06-20 21:45:32 -07001964 }
1965}
1966
Zach Reizner304e7312020-09-29 16:00:24 -07001967fn run_vm<V, Vcpu, I, FV, FI>(cfg: Config, create_vm: FV, create_irq_chip: FI) -> Result<()>
Steven Richmanf32d0b42020-06-20 21:45:32 -07001968where
1969 V: VmArch + 'static,
Zach Reizner304e7312020-09-29 16:00:24 -07001970 Vcpu: VcpuArch + 'static,
1971 I: IrqChipArch + 'static,
Steven Richmanf32d0b42020-06-20 21:45:32 -07001972 FV: FnOnce(GuestMemory) -> base::Result<V>,
1973 FI: FnOnce(
1974 &V,
1975 usize, // vcpu_count
1976 VmIrqRequestSocket, // ioapic_device_socket
1977 ) -> base::Result<I>,
1978{
Lepton Wu9105e9f2019-03-14 11:38:31 -07001979 if cfg.sandbox {
Dylan Reid059a1882018-07-23 17:58:09 -07001980 // Printing something to the syslog before entering minijail so that libc's syslogger has a
1981 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
1982 // access to those files will not be possible.
1983 info!("crosvm entering multiprocess mode");
1984 }
1985
Jingkui Wang100e6e42019-03-08 20:41:57 -08001986 let (usb_control_socket, usb_provider) =
David Tolnay5fb3f512019-04-12 19:22:33 -07001987 HostBackendDeviceProvider::new().map_err(Error::CreateUsbProvider)?;
Dylan Reid059a1882018-07-23 17:58:09 -07001988 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
1989 // before any jailed devices have been spawned, so that we can catch any of them that fail very
1990 // quickly.
1991 let sigchld_fd = SignalFd::new(libc::SIGCHLD).map_err(Error::CreateSignalFd)?;
1992
David Tolnay2b089fc2019-03-04 15:33:22 -08001993 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
1994 Some(File::open(initrd_path).map_err(|e| Error::OpenInitrd(initrd_path.clone(), e))?)
Daniel Verkampe403f5c2018-12-11 16:29:26 -08001995 } else {
1996 None
1997 };
1998
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07001999 let vm_image = match cfg.executable_path {
2000 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
2001 File::open(kernel_path).map_err(|e| Error::OpenKernel(kernel_path.to_path_buf(), e))?,
2002 ),
2003 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
2004 File::open(bios_path).map_err(|e| Error::OpenBios(bios_path.to_path_buf(), e))?,
2005 ),
2006 _ => panic!("Did not receive a bios or kernel, should be impossible."),
2007 };
2008
Dylan Reid059a1882018-07-23 17:58:09 -07002009 let components = VmComponents {
Daniel Verkamp6a847062019-11-26 13:16:35 -08002010 memory_size: cfg
2011 .memory
2012 .unwrap_or(256)
2013 .checked_mul(1024 * 1024)
2014 .ok_or(Error::MemoryTooLarge)?,
Dylan Reid059a1882018-07-23 17:58:09 -07002015 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -07002016 vcpu_affinity: cfg.vcpu_affinity.clone(),
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002017 no_smt: cfg.no_smt,
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07002018 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002019 android_fstab: cfg
2020 .android_fstab
2021 .as_ref()
David Tolnay2b089fc2019-03-04 15:33:22 -08002022 .map(|x| File::open(x).map_err(|e| Error::OpenAndroidFstab(x.to_path_buf(), e)))
Tristan Muntsinger4133b012018-12-21 16:01:56 -08002023 .map_or(Ok(None), |v| v.map(Some))?,
Kansho Nishida282115b2019-12-18 13:13:14 +09002024 pstore: cfg.pstore.clone(),
Daniel Verkampe403f5c2018-12-11 16:29:26 -08002025 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -07002026 extra_kernel_params: cfg.params.clone(),
2027 wayland_dmabuf: cfg.wayland_dmabuf,
Tomasz Jeznach42644642020-05-20 23:27:59 -07002028 acpi_sdts: cfg
2029 .acpi_tables
2030 .iter()
2031 .map(|path| SDT::from_file(path).map_err(|e| Error::OpenAcpiTable(path.clone(), e)))
2032 .collect::<Result<Vec<SDT>>>()?,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002033 rt_cpus: cfg.rt_cpus.clone(),
Will Deacon7d2b8ac2020-10-06 18:51:12 +01002034 protected_vm: cfg.protected_vm,
Dylan Reid059a1882018-07-23 17:58:09 -07002035 };
2036
Zach Reiznera60744b2019-02-13 17:33:32 -08002037 let control_server_socket = match &cfg.socket_path {
2038 Some(path) => Some(UnlinkUnixSeqpacketListener(
2039 UnixSeqpacketListener::bind(path).map_err(Error::CreateSocket)?,
2040 )),
2041 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07002042 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002043
2044 let mut control_sockets = Vec::new();
Zach Reizner55a9e502018-10-03 10:22:32 -07002045 let (wayland_host_socket, wayland_device_socket) =
Gurchetan Singh53edb812019-05-22 08:57:16 -07002046 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?;
2047 control_sockets.push(TaggedControlSocket::VmMemory(wayland_host_socket));
Dylan Reid059a1882018-07-23 17:58:09 -07002048 // Balloon gets a special socket so balloon requests can be forwarded from the main process.
Zach Reizner55a9e502018-10-03 10:22:32 -07002049 let (balloon_host_socket, balloon_device_socket) =
Charles William Dick664cc3c2020-01-10 14:31:52 +09002050 msg_socket::pair::<BalloonControlCommand, BalloonControlResult>()
2051 .map_err(Error::CreateSocket)?;
Dylan Reid059a1882018-07-23 17:58:09 -07002052
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002053 // Create one control socket per disk.
2054 let mut disk_device_sockets = Vec::new();
2055 let mut disk_host_sockets = Vec::new();
2056 let disk_count = cfg.disks.len();
2057 for _ in 0..disk_count {
2058 let (disk_host_socket, disk_device_socket) =
Jakub Staronecf81e02019-04-11 11:43:39 -07002059 msg_socket::pair::<DiskControlCommand, DiskControlResult>()
2060 .map_err(Error::CreateSocket)?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002061 disk_host_sockets.push(disk_host_socket);
Jakub Starone7c59052019-04-09 12:31:14 -07002062 disk_device_sockets.push(disk_device_socket);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002063 }
2064
Daniel Verkampe1980a92020-02-07 11:00:55 -08002065 let mut pmem_device_sockets = Vec::new();
2066 let pmem_count = cfg.pmem_devices.len();
2067 for _ in 0..pmem_count {
2068 let (pmem_host_socket, pmem_device_socket) =
2069 msg_socket::pair::<VmMsyncResponse, VmMsyncRequest>().map_err(Error::CreateSocket)?;
2070 pmem_device_sockets.push(pmem_device_socket);
2071 control_sockets.push(TaggedControlSocket::VmMsync(pmem_host_socket));
2072 }
2073
Gurchetan Singh96beafc2019-05-15 09:46:52 -07002074 let (gpu_host_socket, gpu_device_socket) =
2075 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?;
2076 control_sockets.push(TaggedControlSocket::VmMemory(gpu_host_socket));
2077
Zhuocheng Dingf2e90bf2019-12-02 15:50:20 +08002078 let (ioapic_host_socket, ioapic_device_socket) =
2079 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
2080 control_sockets.push(TaggedControlSocket::VmIrq(ioapic_host_socket));
2081
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002082 let map_request: Arc<Mutex<Option<ExternalMapping>>> = Arc::new(Mutex::new(None));
2083
Zach Reizner304e7312020-09-29 16:00:24 -07002084 let linux: RunnableLinuxVm<_, Vcpu, _> = Arch::build_vm(
Trent Begin17ccaad2019-04-17 13:51:25 -06002085 components,
Trent Begin17ccaad2019-04-17 13:51:25 -06002086 &cfg.serial_parameters,
Matt Delco45caf912019-11-13 08:11:09 -08002087 simple_jail(&cfg, "serial")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07002088 |mem, vm, sys_allocator, exit_evt| {
Trent Begin17ccaad2019-04-17 13:51:25 -06002089 create_devices(
2090 &cfg,
Jakub Starona3411ea2019-04-24 10:55:25 -07002091 mem,
2092 vm,
2093 sys_allocator,
2094 exit_evt,
Xiong Zhanga5d248c2019-09-17 14:17:19 -07002095 &mut control_sockets,
Trent Begin17ccaad2019-04-17 13:51:25 -06002096 wayland_device_socket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07002097 gpu_device_socket,
Trent Begin17ccaad2019-04-17 13:51:25 -06002098 balloon_device_socket,
2099 &mut disk_device_sockets,
Daniel Verkampe1980a92020-02-07 11:00:55 -08002100 &mut pmem_device_sockets,
Trent Begin17ccaad2019-04-17 13:51:25 -06002101 usb_provider,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002102 Arc::clone(&map_request),
Trent Begin17ccaad2019-04-17 13:51:25 -06002103 )
2104 },
Steven Richmanf32d0b42020-06-20 21:45:32 -07002105 create_vm,
2106 |vm, vcpu_count| create_irq_chip(vm, vcpu_count, ioapic_device_socket),
Trent Begin17ccaad2019-04-17 13:51:25 -06002107 )
David Tolnaybe034262019-03-04 17:48:36 -08002108 .map_err(Error::BuildVm)?;
Lepton Wu60893882018-11-21 11:06:18 -08002109
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002110 run_control(
2111 linux,
Zach Reiznera60744b2019-02-13 17:33:32 -08002112 control_server_socket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002113 control_sockets,
2114 balloon_host_socket,
2115 &disk_host_sockets,
Jingkui Wang100e6e42019-03-08 20:41:57 -08002116 usb_control_socket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002117 sigchld_fd,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002118 cfg.sandbox,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002119 Arc::clone(&map_request),
Daniel Verkamp92f73d72018-12-04 13:17:46 -08002120 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07002121}
2122
Zach Reizner304e7312020-09-29 16:00:24 -07002123fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static, I: IrqChipArch + 'static>(
2124 mut linux: RunnableLinuxVm<V, Vcpu, I>,
Zach Reiznera60744b2019-02-13 17:33:32 -08002125 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Jakub Starond99cd0a2019-04-11 14:09:39 -07002126 mut control_sockets: Vec<TaggedControlSocket>,
Jakub Staron1f828d72019-04-11 12:49:29 -07002127 balloon_host_socket: BalloonControlRequestSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07002128 disk_host_sockets: &[DiskControlRequestSocket],
Jingkui Wang100e6e42019-03-08 20:41:57 -08002129 usb_control_socket: UsbControlSocket,
Zach Reizner55a9e502018-10-03 10:22:32 -07002130 sigchld_fd: SignalFd,
Lepton Wu20333e42019-03-14 10:48:03 -07002131 sandbox: bool,
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002132 map_request: Arc<Mutex<Option<ExternalMapping>>>,
Zach Reizner55a9e502018-10-03 10:22:32 -07002133) -> Result<()> {
David Tolnay5bbbf612018-12-01 17:49:30 -08002134 const LOWMEM_AVAILABLE: &str = "/sys/kernel/mm/chromeos-low_mem/available";
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002135
Zach Reizner5bed0d22018-03-28 02:31:11 -07002136 #[derive(PollToken)]
2137 enum Token {
2138 Exit,
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002139 Suspend,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002140 ChildSignal,
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002141 IrqFd { index: IrqEventIndex },
Charles William Dick0bf8a552019-10-29 15:36:01 +09002142 BalanceMemory,
2143 BalloonResult,
Zach Reiznera60744b2019-02-13 17:33:32 -08002144 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07002145 VmControl { index: usize },
2146 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002147
Zach Reizner19ad1f32019-12-12 18:58:50 -08002148 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08002149 .set_raw_mode()
2150 .expect("failed to set terminal raw mode");
2151
Michael Hoylee392c462020-10-07 03:29:24 -07002152 let wait_ctx = WaitContext::build_with(&[
Zach Reiznerb2110be2019-07-23 15:55:03 -07002153 (&linux.exit_evt, Token::Exit),
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002154 (&linux.suspend_evt, Token::Suspend),
Zach Reiznerb2110be2019-07-23 15:55:03 -07002155 (&sigchld_fd, Token::ChildSignal),
2156 ])
Michael Hoylee392c462020-10-07 03:29:24 -07002157 .map_err(Error::WaitContextAdd)?;
Zach Reiznerb2110be2019-07-23 15:55:03 -07002158
Zach Reiznera60744b2019-02-13 17:33:32 -08002159 if let Some(socket_server) = &control_server_socket {
Michael Hoylee392c462020-10-07 03:29:24 -07002160 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08002161 .add(socket_server, Token::VmControlServer)
Michael Hoylee392c462020-10-07 03:29:24 -07002162 .map_err(Error::WaitContextAdd)?;
Zach Reiznera60744b2019-02-13 17:33:32 -08002163 }
Dylan Reid059a1882018-07-23 17:58:09 -07002164 for (index, socket) in control_sockets.iter().enumerate() {
Michael Hoylee392c462020-10-07 03:29:24 -07002165 wait_ctx
Zach Reizner55a9e502018-10-03 10:22:32 -07002166 .add(socket.as_ref(), Token::VmControl { index })
Michael Hoylee392c462020-10-07 03:29:24 -07002167 .map_err(Error::WaitContextAdd)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002168 }
2169
Steven Richmanf32d0b42020-06-20 21:45:32 -07002170 let events = linux
2171 .irq_chip
2172 .irq_event_tokens()
Michael Hoylee392c462020-10-07 03:29:24 -07002173 .map_err(Error::WaitContextAdd)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002174
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002175 for (index, _gsi, evt) in events {
Michael Hoylee392c462020-10-07 03:29:24 -07002176 wait_ctx
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002177 .add(&evt, Token::IrqFd { index })
Michael Hoylee392c462020-10-07 03:29:24 -07002178 .map_err(Error::WaitContextAdd)?;
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002179 }
2180
Charles William Dick0bf8a552019-10-29 15:36:01 +09002181 // Balance available memory between guest and host every second.
Michael Hoyle08d86a42020-08-19 14:45:21 -07002182 let mut balancemem_timer = Timer::new().map_err(Error::CreateTimer)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002183 if Path::new(LOWMEM_AVAILABLE).exists() {
2184 // Create timer request balloon stats every 1s.
Michael Hoylee392c462020-10-07 03:29:24 -07002185 wait_ctx
Charles William Dick0bf8a552019-10-29 15:36:01 +09002186 .add(&balancemem_timer, Token::BalanceMemory)
Michael Hoylee392c462020-10-07 03:29:24 -07002187 .map_err(Error::WaitContextAdd)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002188 let balancemem_dur = Duration::from_secs(1);
2189 let balancemem_int = Duration::from_secs(1);
2190 balancemem_timer
2191 .reset(balancemem_dur, Some(balancemem_int))
Michael Hoyle08d86a42020-08-19 14:45:21 -07002192 .map_err(Error::ResetTimer)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002193
2194 // Listen for balloon statistics from the guest so we can balance.
Michael Hoylee392c462020-10-07 03:29:24 -07002195 wait_ctx
Charles William Dick0bf8a552019-10-29 15:36:01 +09002196 .add(&balloon_host_socket, Token::BalloonResult)
Michael Hoylee392c462020-10-07 03:29:24 -07002197 .map_err(Error::WaitContextAdd)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002198 } else {
2199 warn!("Unable to open low mem available, maybe not a chrome os kernel");
2200 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002201
Lepton Wu20333e42019-03-14 10:48:03 -07002202 if sandbox {
2203 // Before starting VCPUs, in case we started with some capabilities, drop them all.
2204 drop_capabilities().map_err(Error::DropCapabilities)?;
2205 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08002206
Steven Richmanf32d0b42020-06-20 21:45:32 -07002207 let mut vcpu_handles = Vec::with_capacity(linux.vcpu_count);
2208 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpu_count + 1));
Steven Richmanf32d0b42020-06-20 21:45:32 -07002209 let use_hypervisor_signals = !linux
2210 .vm
2211 .get_hypervisor()
2212 .check_capability(&HypervisorCap::ImmediateExit);
Zach Reizner304e7312020-09-29 16:00:24 -07002213 setup_vcpu_signal_handler::<Vcpu>(use_hypervisor_signals)?;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002214
Zach Reizner304e7312020-09-29 16:00:24 -07002215 let vcpus: Vec<Option<_>> = match linux.vcpus.take() {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002216 Some(vec) => vec.into_iter().map(|vcpu| Some(vcpu)).collect(),
2217 None => iter::repeat_with(|| None).take(linux.vcpu_count).collect(),
2218 };
Daniel Verkamp94c35272019-09-12 13:31:30 -07002219 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Dylan Reidb0492662019-05-17 14:50:13 -07002220 let (to_vcpu_channel, from_main_channel) = mpsc::channel();
Daniel Verkampc677fb42020-09-08 13:47:49 -07002221 let vcpu_affinity = match linux.vcpu_affinity.clone() {
2222 Some(VcpuAffinity::Global(v)) => v,
2223 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&cpu_id).unwrap_or_default(),
2224 None => Default::default(),
2225 };
Zach Reizner55a9e502018-10-03 10:22:32 -07002226 let handle = run_vcpu(
Steven Richmanf32d0b42020-06-20 21:45:32 -07002227 cpu_id,
Zach Reizner55a9e502018-10-03 10:22:32 -07002228 vcpu,
Michael Hoyle685316f2020-09-16 15:29:20 -07002229 linux.vm.try_clone().map_err(Error::CloneEvent)?,
2230 linux.irq_chip.try_clone().map_err(Error::CloneEvent)?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002231 linux.vcpu_count,
Kansho Nishidaab205af2020-08-13 18:17:50 +09002232 linux.rt_cpus.contains(&cpu_id),
Daniel Verkampc677fb42020-09-08 13:47:49 -07002233 vcpu_affinity,
Suleiman Souhlal015c3c12020-10-07 14:15:41 +09002234 linux.no_smt,
Zach Reizner55a9e502018-10-03 10:22:32 -07002235 vcpu_thread_barrier.clone(),
Steven Richmanf32d0b42020-06-20 21:45:32 -07002236 linux.has_bios,
Zach Reizner55a9e502018-10-03 10:22:32 -07002237 linux.io_bus.clone(),
2238 linux.mmio_bus.clone(),
Michael Hoyle685316f2020-09-16 15:29:20 -07002239 linux.exit_evt.try_clone().map_err(Error::CloneEvent)?,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002240 linux.vm.check_capability(VmCap::PvClockSuspend),
Dylan Reidb0492662019-05-17 14:50:13 -07002241 from_main_channel,
Steven Richmanf32d0b42020-06-20 21:45:32 -07002242 use_hypervisor_signals,
Zach Reizner55a9e502018-10-03 10:22:32 -07002243 )?;
Dylan Reidb0492662019-05-17 14:50:13 -07002244 vcpu_handles.push((handle, to_vcpu_channel));
Dylan Reid059a1882018-07-23 17:58:09 -07002245 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002246
Dylan Reid059a1882018-07-23 17:58:09 -07002247 vcpu_thread_barrier.wait();
2248
Michael Hoylee392c462020-10-07 03:29:24 -07002249 'wait: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002250 let events = {
Michael Hoylee392c462020-10-07 03:29:24 -07002251 match wait_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002252 Ok(v) => v,
2253 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08002254 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08002255 break;
2256 }
2257 }
2258 };
Zach Reiznera60744b2019-02-13 17:33:32 -08002259
Steven Richmanf32d0b42020-06-20 21:45:32 -07002260 if let Err(e) = linux.irq_chip.process_delayed_irq_events() {
2261 warn!("can't deliver delayed irqs: {}", e);
2262 }
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002263
Zach Reiznera60744b2019-02-13 17:33:32 -08002264 let mut vm_control_indices_to_remove = Vec::new();
Michael Hoylee392c462020-10-07 03:29:24 -07002265 for event in events.iter().filter(|e| e.is_readable) {
2266 match event.token {
Zach Reizner5bed0d22018-03-28 02:31:11 -07002267 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002268 info!("vcpu requested shutdown");
Michael Hoylee392c462020-10-07 03:29:24 -07002269 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002270 }
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002271 Token::Suspend => {
2272 info!("VM requested suspend");
2273 linux.suspend_evt.read().unwrap();
Dylan Reidb0492662019-05-17 14:50:13 -07002274 for (handle, channel) in &vcpu_handles {
2275 if let Err(e) = channel.send(VmRunMode::Suspending) {
2276 error!("failed to send VmRunMode: {}", e);
2277 }
2278
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002279 let _ = handle.kill(SIGRTMIN() + 0);
2280 }
2281 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002282 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002283 // Print all available siginfo structs, then exit the loop.
David Tolnayf5032762018-12-03 10:46:45 -08002284 while let Some(siginfo) = sigchld_fd.read().map_err(Error::SignalFd)? {
Zach Reizner3ba00982019-01-23 19:04:43 -08002285 let pid = siginfo.ssi_pid;
2286 let pid_label = match linux.pid_debug_label_map.get(&pid) {
2287 Some(label) => format!("{} (pid {})", label, pid),
2288 None => format!("pid {}", pid),
2289 };
David Tolnayf5032762018-12-03 10:46:45 -08002290 error!(
2291 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08002292 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08002293 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08002294 }
Michael Hoylee392c462020-10-07 03:29:24 -07002295 break 'wait;
Zach Reizner39aa26b2017-12-12 18:03:23 -08002296 }
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002297 Token::IrqFd { index } => {
2298 if let Err(e) = linux.irq_chip.service_irq_event(index) {
2299 error!("failed to signal irq {}: {}", index, e);
Zhuocheng Dingb9f4c9b2019-12-02 15:50:28 +08002300 }
2301 }
Charles William Dick0bf8a552019-10-29 15:36:01 +09002302 Token::BalanceMemory => {
Michael Hoyle08d86a42020-08-19 14:45:21 -07002303 balancemem_timer.wait().map_err(Error::Timer)?;
Charles William Dick0bf8a552019-10-29 15:36:01 +09002304 let command = BalloonControlCommand::Stats {};
2305 if let Err(e) = balloon_host_socket.send(&command) {
2306 warn!("failed to send stats request to balloon device: {}", e);
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002307 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002308 }
Charles William Dick0bf8a552019-10-29 15:36:01 +09002309 Token::BalloonResult => {
2310 match balloon_host_socket.recv() {
2311 Ok(BalloonControlResult::Stats {
2312 stats,
2313 balloon_actual: balloon_actual_u,
2314 }) => {
2315 // Available memory is reported in MB, and we need bytes.
2316 let host_available = file_to_i64(LOWMEM_AVAILABLE)
2317 .map_err(Error::ReadMemAvailable)?
2318 << 20;
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002319 let guest_free_u = if let Some(free) = stats.free_memory {
2320 free
Charles William Dick0bf8a552019-10-29 15:36:01 +09002321 } else {
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002322 warn!("guest free_memory stat is missing");
Charles William Dick0bf8a552019-10-29 15:36:01 +09002323 continue;
2324 };
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002325 let guest_cached_u = if let Some(cached) = stats.disk_caches {
2326 cached
2327 } else {
2328 warn!("guest disk_caches stat is missing");
2329 continue;
2330 };
2331 if guest_free_u > i64::max_value() as u64 {
2332 warn!("guest free memory is too large");
2333 continue;
2334 }
2335 if guest_cached_u > i64::max_value() as u64 {
2336 warn!("guest cached memory is too large");
Charles William Dick0bf8a552019-10-29 15:36:01 +09002337 continue;
2338 }
2339 if balloon_actual_u > i64::max_value() as u64 {
2340 warn!("actual balloon size is too large");
2341 continue;
2342 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002343 // Tell the guest to change the balloon size if the target balloon size
2344 // is more than 5% different from the current balloon size.
Charles William Dick0bf8a552019-10-29 15:36:01 +09002345 const RESIZE_PERCENT: i64 = 5;
2346 let balloon_actual = balloon_actual_u as i64;
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002347 let guest_free = guest_free_u as i64;
2348 let guest_cached = guest_cached_u as i64;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002349 // Compute how much memory the guest should have available after we
2350 // rebalance.
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002351 let guest_available_target = host_available;
2352 let guest_available_delta =
2353 guest_available_target - guest_free - guest_cached;
Steven Richmanf32d0b42020-06-20 21:45:32 -07002354 // How much do we have to change the balloon to balance.
Charles William Dick0bf8a552019-10-29 15:36:01 +09002355 let balloon_target = max(balloon_actual - guest_available_delta, 0);
Steven Richmanf32d0b42020-06-20 21:45:32 -07002356 // Compute the change in balloon size in percent. If the balloon size
2357 // is 0, use 1 so we don't overflow from the infinity % increase.
Charles William Dick0bf8a552019-10-29 15:36:01 +09002358 let balloon_change_percent = (balloon_actual - balloon_target).abs()
2359 * 100
2360 / max(balloon_actual, 1);
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002361
Charles William Dick0bf8a552019-10-29 15:36:01 +09002362 if balloon_change_percent >= RESIZE_PERCENT {
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002363 info!("resizing balloon: host avail {}, guest free {} cached {} (target {}), balloon actual {} (target {})",
Daniel Verkamp1cd80992020-07-27 12:41:50 -07002364 host_available,
Suleiman Souhlal14fa6bd2020-08-18 13:01:15 +09002365 guest_free,
2366 guest_cached,
Daniel Verkamp1cd80992020-07-27 12:41:50 -07002367 guest_available_target,
2368 balloon_actual,
2369 balloon_target,
2370 );
Charles William Dick0bf8a552019-10-29 15:36:01 +09002371 let command = BalloonControlCommand::Adjust {
2372 num_bytes: balloon_target as u64,
2373 };
2374 if let Err(e) = balloon_host_socket.send(&command) {
2375 warn!("failed to send memory value to balloon device: {}", e);
2376 }
2377 }
2378 }
2379 Err(e) => {
2380 error!("failed to recv BalloonControlResult: {}", e);
2381 }
2382 };
Chirantan Ekbote448516e2018-07-24 16:07:42 -07002383 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002384 Token::VmControlServer => {
2385 if let Some(socket_server) = &control_server_socket {
2386 match socket_server.accept() {
2387 Ok(socket) => {
Michael Hoylee392c462020-10-07 03:29:24 -07002388 wait_ctx
Zach Reiznera60744b2019-02-13 17:33:32 -08002389 .add(
2390 &socket,
2391 Token::VmControl {
2392 index: control_sockets.len(),
2393 },
2394 )
Michael Hoylee392c462020-10-07 03:29:24 -07002395 .map_err(Error::WaitContextAdd)?;
Jakub Starond99cd0a2019-04-11 14:09:39 -07002396 control_sockets
2397 .push(TaggedControlSocket::Vm(MsgSocket::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08002398 }
2399 Err(e) => error!("failed to accept socket: {}", e),
2400 }
2401 }
2402 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002403 Token::VmControl { index } => {
Daniel Verkamp37c4a782019-01-04 10:44:17 -08002404 if let Some(socket) = control_sockets.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002405 match socket {
2406 TaggedControlSocket::Vm(socket) => match socket.recv() {
2407 Ok(request) => {
2408 let mut run_mode_opt = None;
2409 let response = request.execute(
2410 &mut run_mode_opt,
2411 &balloon_host_socket,
2412 disk_host_sockets,
2413 &usb_control_socket,
2414 );
2415 if let Err(e) = socket.send(&response) {
2416 error!("failed to send VmResponse: {}", e);
2417 }
2418 if let Some(run_mode) = run_mode_opt {
2419 info!("control socket changed run mode to {}", run_mode);
2420 match run_mode {
2421 VmRunMode::Exiting => {
Michael Hoylee392c462020-10-07 03:29:24 -07002422 break 'wait;
Jakub Starond99cd0a2019-04-11 14:09:39 -07002423 }
Dylan Reidb0492662019-05-17 14:50:13 -07002424 VmRunMode::Suspending | VmRunMode::Running => {
2425 if run_mode == VmRunMode::Suspending {
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002426 linux.io_bus.notify_resume();
2427 }
Dylan Reidb0492662019-05-17 14:50:13 -07002428 for (handle, channel) in &vcpu_handles {
2429 if let Err(e) = channel.send(VmRunMode::Running)
2430 {
2431 error!("failed to send VmRunMode: {}", e);
2432 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07002433 let _ = handle.kill(SIGRTMIN() + 0);
2434 }
Zach Reizner6a8fdd92019-01-16 14:38:41 -08002435 }
2436 }
2437 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002438 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07002439 Err(e) => {
Zach Reizner297ae772020-02-21 14:45:14 -08002440 if let MsgError::RecvZero = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002441 vm_control_indices_to_remove.push(index);
2442 } else {
2443 error!("failed to recv VmRequest: {}", e);
2444 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002445 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07002446 },
Gurchetan Singh53edb812019-05-22 08:57:16 -07002447 TaggedControlSocket::VmMemory(socket) => match socket.recv() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002448 Ok(request) => {
Lingfeng Yangd6ac1ab2020-01-31 13:55:35 -08002449 let response = request.execute(
2450 &mut linux.vm,
2451 &mut linux.resources,
2452 Arc::clone(&map_request),
2453 );
Jakub Starond99cd0a2019-04-11 14:09:39 -07002454 if let Err(e) = socket.send(&response) {
Gurchetan Singh53edb812019-05-22 08:57:16 -07002455 error!("failed to send VmMemoryControlResponse: {}", e);
Jakub Starond99cd0a2019-04-11 14:09:39 -07002456 }
2457 }
2458 Err(e) => {
Zach Reizner297ae772020-02-21 14:45:14 -08002459 if let MsgError::RecvZero = e {
Jakub Starond99cd0a2019-04-11 14:09:39 -07002460 vm_control_indices_to_remove.push(index);
2461 } else {
Gurchetan Singh53edb812019-05-22 08:57:16 -07002462 error!("failed to recv VmMemoryControlRequest: {}", e);
Jakub Starond99cd0a2019-04-11 14:09:39 -07002463 }
2464 }
2465 },
Xiong Zhang2515b752019-09-19 10:29:02 +08002466 TaggedControlSocket::VmIrq(socket) => match socket.recv() {
2467 Ok(request) => {
Steven Richmanf32d0b42020-06-20 21:45:32 -07002468 let response = {
2469 let irq_chip = &mut linux.irq_chip;
2470 request.execute(
2471 |setup| match setup {
2472 IrqSetup::Event(irq, ev) => {
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002473 if let Some(event_index) = irq_chip
2474 .register_irq_event(irq, ev, None)?
2475 {
2476 match wait_ctx.add(
2477 ev,
2478 Token::IrqFd {
2479 index: event_index
2480 },
2481 ) {
2482 Err(e) => {
2483 warn!("failed to add IrqFd to poll context: {}", e);
2484 Err(e)
2485 },
2486 Ok(_) => {
2487 Ok(())
2488 }
2489 }
2490 } else {
2491 Ok(())
2492 }
Steven Richmanf32d0b42020-06-20 21:45:32 -07002493 }
2494 IrqSetup::Route(route) => irq_chip.route_irq(route),
2495 },
2496 &mut linux.resources,
2497 )
2498 };
Xiong Zhang2515b752019-09-19 10:29:02 +08002499 if let Err(e) = socket.send(&response) {
2500 error!("failed to send VmIrqResponse: {}", e);
2501 }
2502 }
2503 Err(e) => {
Zach Reizner297ae772020-02-21 14:45:14 -08002504 if let MsgError::RecvZero = e {
Xiong Zhang2515b752019-09-19 10:29:02 +08002505 vm_control_indices_to_remove.push(index);
2506 } else {
2507 error!("failed to recv VmIrqRequest: {}", e);
2508 }
2509 }
2510 },
Daniel Verkampe1980a92020-02-07 11:00:55 -08002511 TaggedControlSocket::VmMsync(socket) => match socket.recv() {
2512 Ok(request) => {
2513 let response = request.execute(&mut linux.vm);
2514 if let Err(e) = socket.send(&response) {
2515 error!("failed to send VmMsyncResponse: {}", e);
2516 }
2517 }
2518 Err(e) => {
2519 if let MsgError::BadRecvSize { actual: 0, .. } = e {
2520 vm_control_indices_to_remove.push(index);
2521 } else {
2522 error!("failed to recv VmMsyncRequest: {}", e);
2523 }
2524 }
2525 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08002526 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002527 }
2528 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002529 }
2530 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002531
Michael Hoylee392c462020-10-07 03:29:24 -07002532 for event in events.iter().filter(|e| e.is_hungup) {
2533 match event.token {
Zach Reiznera60744b2019-02-13 17:33:32 -08002534 Token::Exit => {}
Chuanxiao Dong546f01c2020-02-12 21:58:47 +08002535 Token::Suspend => {}
Zach Reiznera60744b2019-02-13 17:33:32 -08002536 Token::ChildSignal => {}
Colin Downs-Razoukbd532762020-09-08 15:49:35 -07002537 Token::IrqFd { index: _ } => {}
Charles William Dick0bf8a552019-10-29 15:36:01 +09002538 Token::BalanceMemory => {}
2539 Token::BalloonResult => {}
Zach Reiznera60744b2019-02-13 17:33:32 -08002540 Token::VmControlServer => {}
2541 Token::VmControl { index } => {
2542 // It's possible more data is readable and buffered while the socket is hungup,
2543 // so don't delete the socket from the poll context until we're sure all the
2544 // data is read.
Jakub Starond99cd0a2019-04-11 14:09:39 -07002545 match control_sockets
2546 .get(index)
2547 .map(|s| s.as_ref().get_readable_bytes())
2548 {
Zach Reiznera60744b2019-02-13 17:33:32 -08002549 Some(Ok(0)) | Some(Err(_)) => vm_control_indices_to_remove.push(index),
2550 Some(Ok(x)) => info!("control index {} has {} bytes readable", index, x),
2551 _ => {}
Zach Reizner55a9e502018-10-03 10:22:32 -07002552 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07002553 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002554 }
2555 }
Zach Reiznera60744b2019-02-13 17:33:32 -08002556
2557 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08002558 // preserves correct indexes as each element is removed.
Daniel Verkamp8c2f0002020-08-31 15:13:35 -07002559 vm_control_indices_to_remove.sort_unstable_by_key(|&k| Reverse(k));
Zach Reiznera60744b2019-02-13 17:33:32 -08002560 vm_control_indices_to_remove.dedup();
2561 for index in vm_control_indices_to_remove {
Michael Hoylee392c462020-10-07 03:29:24 -07002562 // Delete the socket from the `wait_ctx` synchronously. Otherwise, the kernel will do
2563 // this automatically when the FD inserted into the `wait_ctx` is closed after this
Zide Chen89584072019-11-14 10:33:51 -08002564 // if-block, but this removal can be deferred unpredictably. In some instances where the
Michael Hoylee392c462020-10-07 03:29:24 -07002565 // system is under heavy load, we can even get events returned by `wait_ctx` for an FD
Zide Chen89584072019-11-14 10:33:51 -08002566 // that has already been closed. Because the token associated with that spurious event
2567 // now belongs to a different socket, the control loop will start to interact with
2568 // sockets that might not be ready to use. This can cause incorrect hangup detection or
2569 // blocking on a socket that will never be ready. See also: crbug.com/1019986
2570 if let Some(socket) = control_sockets.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07002571 wait_ctx.delete(socket).map_err(Error::WaitContextDelete)?;
Zide Chen89584072019-11-14 10:33:51 -08002572 }
2573
2574 // This line implicitly drops the socket at `index` when it gets returned by
2575 // `swap_remove`. After this line, the socket at `index` is not the one from
2576 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
Michael Hoylee392c462020-10-07 03:29:24 -07002577 // use `wait_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznera60744b2019-02-13 17:33:32 -08002578 control_sockets.swap_remove(index);
2579 if let Some(socket) = control_sockets.get(index) {
Michael Hoylee392c462020-10-07 03:29:24 -07002580 wait_ctx
2581 .modify(socket, EventType::Read, Token::VmControl { index })
2582 .map_err(Error::WaitContextAdd)?;
Zach Reiznera60744b2019-02-13 17:33:32 -08002583 }
2584 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08002585 }
2586
Dylan Reidb0492662019-05-17 14:50:13 -07002587 for (handle, channel) in vcpu_handles {
2588 // VCPU threads MUST see the VmRunMode flag, otherwise they may re-enter the VM.
2589 if let Err(e) = channel.send(VmRunMode::Exiting) {
2590 error!("failed to send VmRunMode: {}", e);
2591 }
Dmitry Torokhovcd405332018-02-16 16:25:54 -08002592 match handle.kill(SIGRTMIN() + 0) {
Zach Reizner39aa26b2017-12-12 18:03:23 -08002593 Ok(_) => {
2594 if let Err(e) = handle.join() {
2595 error!("failed to join vcpu thread: {:?}", e);
2596 }
2597 }
David Tolnayb4bd00f2019-02-12 17:51:26 -08002598 Err(e) => error!("failed to kill vcpu thread: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -08002599 }
2600 }
2601
Daniel Verkamp94c35272019-09-12 13:31:30 -07002602 // Explicitly drop the VM structure here to allow the devices to clean up before the
2603 // control sockets are closed when this function exits.
2604 mem::drop(linux);
2605
Zach Reizner19ad1f32019-12-12 18:58:50 -08002606 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08002607 .set_canon_mode()
2608 .expect("failed to restore canonical mode for terminal");
2609
2610 Ok(())
2611}