blob: a26e7bba8c7ec625a8d994d3307b22dc7d083ef2 [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use std;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07006use std::cmp::min;
Jakub Starona3411ea2019-04-24 10:55:25 -07007use std::convert::TryFrom;
David Tolnayfdac5ed2019-03-08 16:56:14 -08008use std::error::Error as StdError;
Dylan Reid059a1882018-07-23 17:58:09 -07009use std::ffi::CStr;
David Tolnayc69f9752019-03-01 18:07:56 -080010use std::fmt::{self, Display};
Dylan Reid059a1882018-07-23 17:58:09 -070011use std::fs::{File, OpenOptions};
Zach Reizner55a9e502018-10-03 10:22:32 -070012use std::io::{self, stdin, Read};
Daniel Verkamp94c35272019-09-12 13:31:30 -070013use std::mem;
David Tolnay2b089fc2019-03-04 15:33:22 -080014use std::net::Ipv4Addr;
Daniel Verkamp6f9215c2019-08-20 09:41:22 -070015#[cfg(feature = "gpu")]
Zach Reizner0f2cfb02019-06-19 17:46:03 -070016use std::num::NonZeroU8;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +090017use std::num::ParseIntError;
Jakub Starond99cd0a2019-04-11 14:09:39 -070018use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
Zach Reiznera60744b2019-02-13 17:33:32 -080019use std::os::unix::net::UnixStream;
Zach Reizner39aa26b2017-12-12 18:03:23 -080020use std::path::{Path, PathBuf};
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +090021use std::ptr;
Chirantan Ekbote448516e2018-07-24 16:07:42 -070022use std::str;
Dylan Reid059a1882018-07-23 17:58:09 -070023use std::sync::{Arc, Barrier};
Zach Reizner39aa26b2017-12-12 18:03:23 -080024use std::thread;
25use std::thread::JoinHandle;
Daniel Prilik22006042019-01-14 14:19:04 -080026use std::time::{Duration, SystemTime, UNIX_EPOCH};
Zach Reizner39aa26b2017-12-12 18:03:23 -080027
David Tolnay41a6f842019-03-01 16:18:44 -080028use libc::{self, c_int, gid_t, uid_t};
Zach Reizner39aa26b2017-12-12 18:03:23 -080029
Dylan Reid3082e8e2019-01-07 10:33:48 -080030use audio_streams::DummyStreamSource;
Zach Reizner65b98f12019-11-22 17:34:58 -080031#[cfg(feature = "gpu")]
32use devices::virtio::EventDevice;
David Tolnay2b089fc2019-03-04 15:33:22 -080033use devices::virtio::{self, VirtioDevice};
Xiong Zhang17b0daf2019-04-23 17:14:50 +080034use devices::{
35 self, HostBackendDeviceProvider, PciDevice, VfioDevice, VfioPciDevice, VirtioPciDevice,
36 XhciController,
37};
Zach Reizner39aa26b2017-12-12 18:03:23 -080038use io_jail::{self, Minijail};
Zach Reizner39aa26b2017-12-12 18:03:23 -080039use kvm::*;
paulhsiaf052cfe2019-01-22 15:22:25 +080040use libcras::CrasClient;
Zach Reiznera60744b2019-02-13 17:33:32 -080041use msg_socket::{MsgError, MsgReceiver, MsgSender, MsgSocket};
David Tolnay2b089fc2019-03-04 15:33:22 -080042use net_util::{Error as NetError, MacAddress, Tap};
Daniel Prilik22006042019-01-14 14:19:04 -080043use rand_ish::SimpleRng;
David Tolnay3df35522019-03-11 12:36:30 -070044use remain::sorted;
Xiong Zhang87a3b442019-10-29 17:32:44 +080045use resources::{Alloc, MmioType, SystemAllocator};
Zach Reizner6a8fdd92019-01-16 14:38:41 -080046use sync::{Condvar, Mutex};
Jakub Starond99cd0a2019-04-11 14:09:39 -070047use sys_util::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Jakub Starona3411ea2019-04-24 10:55:25 -070048
Zach Reiznera60744b2019-02-13 17:33:32 -080049use sys_util::{
David Tolnay633426a2019-04-12 12:18:35 -070050 self, block_signal, clear_signal, drop_capabilities, error, flock, get_blocked_signals,
Fletcher Woodruff82ff3972019-10-02 13:11:34 -060051 get_group_id, get_user_id, getegid, geteuid, info, register_rt_signal_handler,
52 set_cpu_affinity, validate_raw_fd, warn, EventFd, FlockOperation, GuestAddress, GuestMemory,
Zach Reizner95885312020-01-29 18:06:01 -080053 Killable, MemoryMappingArena, PollContext, PollToken, Protection, ScopedEvent, SignalFd,
54 Terminal, TimerFd, WatchingEvents, SIGRTMIN,
Zach Reiznera60744b2019-02-13 17:33:32 -080055};
Jason D. Clinton865323d2017-09-27 22:04:03 -060056use vhost;
Jakub Starone7c59052019-04-09 12:31:14 -070057use vm_control::{
Jakub Staron1f828d72019-04-11 12:49:29 -070058 BalloonControlCommand, BalloonControlRequestSocket, BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -070059 DiskControlCommand, DiskControlRequestSocket, DiskControlResponseSocket, DiskControlResult,
Xiong Zhanga5d248c2019-09-17 14:17:19 -070060 UsbControlSocket, VmControlResponseSocket, VmIrqRequest, VmIrqResponse, VmIrqResponseSocket,
61 VmMemoryControlRequestSocket, VmMemoryControlResponseSocket, VmMemoryRequest, VmMemoryResponse,
62 VmRunMode,
Jakub Starone7c59052019-04-09 12:31:14 -070063};
Zach Reizner39aa26b2017-12-12 18:03:23 -080064
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +090065use crate::{Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption};
Zach Reizner39aa26b2017-12-12 18:03:23 -080066
Cody Schuffelen6d1ab502019-05-21 12:12:38 -070067use arch::{self, LinuxArch, RunnableLinuxVm, VirtioDeviceStub, VmComponents, VmImage};
Sonny Raoed517d12018-02-13 22:09:43 -080068
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080069#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
70use aarch64::AArch64 as Arch;
Zach Reizner55a9e502018-10-03 10:22:32 -070071#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
72use x86_64::X8664arch as Arch;
Zach Reizner39aa26b2017-12-12 18:03:23 -080073
David Tolnay3df35522019-03-11 12:36:30 -070074#[sorted]
Dylan Reid059a1882018-07-23 17:58:09 -070075#[derive(Debug)]
Zach Reizner39aa26b2017-12-12 18:03:23 -080076pub enum Error {
Lepton Wu60893882018-11-21 11:06:18 -080077 AddGpuDeviceMemory(sys_util::Error),
Jakub Starona3411ea2019-04-24 10:55:25 -070078 AddPmemDeviceMemory(sys_util::Error),
Lepton Wu60893882018-11-21 11:06:18 -080079 AllocateGpuDeviceAddress,
Jakub Starona3411ea2019-04-24 10:55:25 -070080 AllocatePmemDeviceAddress(resources::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -080081 BalloonDeviceNew(virtio::BalloonError),
Zach Reizner39aa26b2017-12-12 18:03:23 -080082 BlockDeviceNew(sys_util::Error),
Mark Ryan6ed5aea2018-04-20 13:52:35 +010083 BlockSignal(sys_util::signal::Error),
David Tolnaybe034262019-03-04 17:48:36 -080084 BuildVm(<Arch as LinuxArch>::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -080085 ChownTpmStorage(sys_util::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080086 CloneEventFd(sys_util::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -080087 CreateCrasClient(libcras::Error),
Cody Schuffelen7d533e52019-07-02 16:54:05 -070088 CreateDiskError(disk::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080089 CreateEventFd(sys_util::Error),
Zach Reizner5bed0d22018-03-28 02:31:11 -070090 CreatePollContext(sys_util::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080091 CreateSignalFd(sys_util::SignalFdError),
92 CreateSocket(io::Error),
Chirantan Ekbote49fa08f2018-11-16 13:26:53 -080093 CreateTapDevice(NetError),
Chirantan Ekbote448516e2018-07-24 16:07:42 -070094 CreateTimerFd(sys_util::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -080095 CreateTpmStorage(PathBuf, io::Error),
Jingkui Wang100e6e42019-03-08 20:41:57 -080096 CreateUsbProvider(devices::usb::host_backend::error::Error),
Xiong Zhang17b0daf2019-04-23 17:14:50 +080097 CreateVfioDevice(devices::vfio::VfioError),
Zach Reizner39aa26b2017-12-12 18:03:23 -080098 DeviceJail(io_jail::Error),
99 DevicePivotRoot(io_jail::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800100 Disk(io::Error),
Stephen Barberc79de2d2018-02-21 14:17:27 -0800101 DiskImageLock(sys_util::Error),
Dmitry Torokhov71006072019-03-06 10:56:51 -0800102 DropCapabilities(sys_util::Error),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900103 FsDeviceNew(virtio::fs::Error),
104 GetMaxOpenFiles(io::Error),
Lepton Wu39133a02019-02-27 12:42:29 -0800105 InputDeviceNew(virtio::InputError),
106 InputEventsOpen(std::io::Error),
Dylan Reid20566442018-04-02 15:06:15 -0700107 InvalidFdPath,
Zach Reizner579bd2c2018-09-14 15:43:33 -0700108 InvalidWaylandPath,
David Tolnayfd0971d2019-03-04 17:15:57 -0800109 IoJail(io_jail::Error),
David Tolnayfdac5ed2019-03-08 16:56:14 -0800110 LoadKernel(Box<dyn StdError>),
Daniel Verkamp6a847062019-11-26 13:16:35 -0800111 MemoryTooLarge,
David Tolnay2b089fc2019-03-04 15:33:22 -0800112 NetDeviceNew(virtio::NetError),
Tristan Muntsinger4133b012018-12-21 16:01:56 -0800113 OpenAndroidFstab(PathBuf, io::Error),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700114 OpenBios(PathBuf, io::Error),
Daniel Verkampe403f5c2018-12-11 16:29:26 -0800115 OpenInitrd(PathBuf, io::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800116 OpenKernel(PathBuf, io::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -0800117 OpenVinput(PathBuf, io::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800118 P9DeviceNew(virtio::P9Error),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900119 ParseMaxOpenFiles(ParseIntError),
Lepton Wu39133a02019-02-27 12:42:29 -0800120 PivotRootDoesntExist(&'static str),
Jakub Starona3411ea2019-04-24 10:55:25 -0700121 PmemDeviceImageTooBig,
122 PmemDeviceNew(sys_util::Error),
Zach Reizner5bed0d22018-03-28 02:31:11 -0700123 PollContextAdd(sys_util::Error),
Chirantan Ekbote448516e2018-07-24 16:07:42 -0700124 PollContextDelete(sys_util::Error),
Chirantan Ekbote448516e2018-07-24 16:07:42 -0700125 ReadLowmemAvailable(io::Error),
126 ReadLowmemMargin(io::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700127 RegisterBalloon(arch::DeviceRegistrationError),
128 RegisterBlock(arch::DeviceRegistrationError),
129 RegisterGpu(arch::DeviceRegistrationError),
130 RegisterNet(arch::DeviceRegistrationError),
131 RegisterP9(arch::DeviceRegistrationError),
132 RegisterRng(arch::DeviceRegistrationError),
Mark Ryan6ed5aea2018-04-20 13:52:35 +0100133 RegisterSignalHandler(sys_util::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700134 RegisterWayland(arch::DeviceRegistrationError),
Lepton Wu60893882018-11-21 11:06:18 -0800135 ReserveGpuMemory(sys_util::MmapError),
136 ReserveMemory(sys_util::Error),
Jakub Starona3411ea2019-04-24 10:55:25 -0700137 ReservePmemMemory(sys_util::MmapError),
Chirantan Ekbote448516e2018-07-24 16:07:42 -0700138 ResetTimerFd(sys_util::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800139 RngDeviceNew(virtio::RngError),
Zach Reizner8fb52112017-12-13 16:04:39 -0800140 SettingGidMap(io_jail::Error),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900141 SettingMaxOpenFiles(io_jail::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800142 SettingUidMap(io_jail::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800143 SignalFd(sys_util::SignalFdError),
144 SpawnVcpu(io::Error),
Chirantan Ekbote448516e2018-07-24 16:07:42 -0700145 TimerFd(sys_util::Error),
Chirantan Ekbote2d292332018-11-16 11:35:24 -0800146 ValidateRawFd(sys_util::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800147 VhostNetDeviceNew(virtio::vhost::Error),
148 VhostVsockDeviceNew(virtio::vhost::Error),
Daniel Verkamp56f283b2018-10-05 11:40:59 -0700149 VirtioPciDev(sys_util::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800150 WaylandDeviceNew(sys_util::Error),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800151}
152
David Tolnayc69f9752019-03-01 18:07:56 -0800153impl Display for Error {
David Tolnay3df35522019-03-11 12:36:30 -0700154 #[remain::check]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800155 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
David Tolnayc69f9752019-03-01 18:07:56 -0800156 use self::Error::*;
157
David Tolnay3df35522019-03-11 12:36:30 -0700158 #[sorted]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800159 match self {
Lepton Wu60893882018-11-21 11:06:18 -0800160 AddGpuDeviceMemory(e) => write!(f, "failed to add gpu device memory: {}", e),
Jakub Starona3411ea2019-04-24 10:55:25 -0700161 AddPmemDeviceMemory(e) => write!(f, "failed to add pmem device memory: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800162 AllocateGpuDeviceAddress => write!(f, "failed to allocate gpu device guest address"),
Jakub Starona3411ea2019-04-24 10:55:25 -0700163 AllocatePmemDeviceAddress(e) => {
164 write!(f, "failed to allocate memory for pmem device: {}", e)
165 }
David Tolnayc69f9752019-03-01 18:07:56 -0800166 BalloonDeviceNew(e) => write!(f, "failed to create balloon: {}", e),
167 BlockDeviceNew(e) => write!(f, "failed to create block device: {}", e),
168 BlockSignal(e) => write!(f, "failed to block signal: {}", e),
David Tolnaybe034262019-03-04 17:48:36 -0800169 BuildVm(e) => write!(f, "The architecture failed to build the vm: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800170 ChownTpmStorage(e) => write!(f, "failed to chown tpm storage: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800171 CloneEventFd(e) => write!(f, "failed to clone eventfd: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800172 CreateCrasClient(e) => write!(f, "failed to create cras client: {}", e),
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700173 CreateDiskError(e) => write!(f, "failed to create virtual disk: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800174 CreateEventFd(e) => write!(f, "failed to create eventfd: {}", e),
175 CreatePollContext(e) => write!(f, "failed to create poll context: {}", e),
176 CreateSignalFd(e) => write!(f, "failed to create signalfd: {}", e),
177 CreateSocket(e) => write!(f, "failed to create socket: {}", e),
178 CreateTapDevice(e) => write!(f, "failed to create tap device: {}", e),
179 CreateTimerFd(e) => write!(f, "failed to create timerfd: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800180 CreateTpmStorage(p, e) => {
181 write!(f, "failed to create tpm storage dir {}: {}", p.display(), e)
182 }
Jingkui Wang100e6e42019-03-08 20:41:57 -0800183 CreateUsbProvider(e) => write!(f, "failed to create usb provider: {}", e),
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800184 CreateVfioDevice(e) => write!(f, "Failed to create vfio device {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800185 DeviceJail(e) => write!(f, "failed to jail device: {}", e),
186 DevicePivotRoot(e) => write!(f, "failed to pivot root device: {}", e),
187 Disk(e) => write!(f, "failed to load disk image: {}", e),
188 DiskImageLock(e) => write!(f, "failed to lock disk image: {}", e),
Dmitry Torokhov71006072019-03-06 10:56:51 -0800189 DropCapabilities(e) => write!(f, "failed to drop process capabilities: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900190 FsDeviceNew(e) => write!(f, "failed to create fs device: {}", e),
191 GetMaxOpenFiles(e) => write!(f, "failed to get max number of open files: {}", e),
David Tolnay64cd5ea2019-04-15 15:56:35 -0700192 InputDeviceNew(e) => write!(f, "failed to set up input device: {}", e),
193 InputEventsOpen(e) => write!(f, "failed to open event device: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800194 InvalidFdPath => write!(f, "failed parsing a /proc/self/fd/*"),
195 InvalidWaylandPath => write!(f, "wayland socket path has no parent or file name"),
David Tolnayfd0971d2019-03-04 17:15:57 -0800196 IoJail(e) => write!(f, "{}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800197 LoadKernel(e) => write!(f, "failed to load kernel: {}", e),
Daniel Verkamp6a847062019-11-26 13:16:35 -0800198 MemoryTooLarge => write!(f, "requested memory size too large"),
David Tolnayc69f9752019-03-01 18:07:56 -0800199 NetDeviceNew(e) => write!(f, "failed to set up virtio networking: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800200 OpenAndroidFstab(p, e) => write!(
David Tolnayb4bd00f2019-02-12 17:51:26 -0800201 f,
202 "failed to open android fstab file {}: {}",
203 p.display(),
204 e
205 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700206 OpenBios(p, e) => write!(f, "failed to open bios {}: {}", p.display(), e),
David Tolnay3df35522019-03-11 12:36:30 -0700207 OpenInitrd(p, e) => write!(f, "failed to open initrd {}: {}", p.display(), e),
208 OpenKernel(p, e) => write!(f, "failed to open kernel image {}: {}", p.display(), e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800209 OpenVinput(p, e) => write!(f, "failed to open vinput device {}: {}", p.display(), e),
David Tolnayc69f9752019-03-01 18:07:56 -0800210 P9DeviceNew(e) => write!(f, "failed to create 9p device: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900211 ParseMaxOpenFiles(e) => write!(f, "failed to parse max number of open files: {}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800212 PivotRootDoesntExist(p) => write!(f, "{} doesn't exist, can't jail devices.", p),
Jakub Starona3411ea2019-04-24 10:55:25 -0700213 PmemDeviceImageTooBig => {
214 write!(f, "failed to create pmem device: pmem device image too big")
215 }
216 PmemDeviceNew(e) => write!(f, "failed to create pmem device: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800217 PollContextAdd(e) => write!(f, "failed to add fd to poll context: {}", e),
218 PollContextDelete(e) => write!(f, "failed to remove fd from poll context: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800219 ReadLowmemAvailable(e) => write!(
Zach Reizner55a9e502018-10-03 10:22:32 -0700220 f,
221 "failed to read /sys/kernel/mm/chromeos-low_mem/available: {}",
222 e
223 ),
David Tolnayc69f9752019-03-01 18:07:56 -0800224 ReadLowmemMargin(e) => write!(
Zach Reizner55a9e502018-10-03 10:22:32 -0700225 f,
226 "failed to read /sys/kernel/mm/chromeos-low_mem/margin: {}",
227 e
228 ),
David Tolnayc69f9752019-03-01 18:07:56 -0800229 RegisterBalloon(e) => write!(f, "error registering balloon device: {}", e),
230 RegisterBlock(e) => write!(f, "error registering block device: {}", e),
231 RegisterGpu(e) => write!(f, "error registering gpu device: {}", e),
232 RegisterNet(e) => write!(f, "error registering net device: {}", e),
233 RegisterP9(e) => write!(f, "error registering 9p device: {}", e),
234 RegisterRng(e) => write!(f, "error registering rng device: {}", e),
235 RegisterSignalHandler(e) => write!(f, "error registering signal handler: {}", e),
236 RegisterWayland(e) => write!(f, "error registering wayland device: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800237 ReserveGpuMemory(e) => write!(f, "failed to reserve gpu memory: {}", e),
238 ReserveMemory(e) => write!(f, "failed to reserve memory: {}", e),
Jakub Starona3411ea2019-04-24 10:55:25 -0700239 ReservePmemMemory(e) => write!(f, "failed to reserve pmem memory: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800240 ResetTimerFd(e) => write!(f, "failed to reset timerfd: {}", e),
241 RngDeviceNew(e) => write!(f, "failed to set up rng: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800242 SettingGidMap(e) => write!(f, "error setting GID map: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900243 SettingMaxOpenFiles(e) => write!(f, "error setting max open files: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800244 SettingUidMap(e) => write!(f, "error setting UID map: {}", e),
245 SignalFd(e) => write!(f, "failed to read signal fd: {}", e),
246 SpawnVcpu(e) => write!(f, "failed to spawn VCPU thread: {}", e),
247 TimerFd(e) => write!(f, "failed to read timer fd: {}", e),
248 ValidateRawFd(e) => write!(f, "failed to validate raw fd: {}", e),
249 VhostNetDeviceNew(e) => write!(f, "failed to set up vhost networking: {}", e),
250 VhostVsockDeviceNew(e) => write!(f, "failed to set up virtual socket device: {}", e),
251 VirtioPciDev(e) => write!(f, "failed to create virtio pci dev: {}", e),
252 WaylandDeviceNew(e) => write!(f, "failed to create wayland device: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800253 }
254 }
255}
256
David Tolnayfd0971d2019-03-04 17:15:57 -0800257impl From<io_jail::Error> for Error {
258 fn from(err: io_jail::Error) -> Self {
259 Error::IoJail(err)
260 }
261}
262
David Tolnayc69f9752019-03-01 18:07:56 -0800263impl std::error::Error for Error {}
Dylan Reid059a1882018-07-23 17:58:09 -0700264
Zach Reizner39aa26b2017-12-12 18:03:23 -0800265type Result<T> = std::result::Result<T, Error>;
266
Jakub Starond99cd0a2019-04-11 14:09:39 -0700267enum TaggedControlSocket {
268 Vm(VmControlResponseSocket),
Gurchetan Singh53edb812019-05-22 08:57:16 -0700269 VmMemory(VmMemoryControlResponseSocket),
Xiong Zhang2515b752019-09-19 10:29:02 +0800270 VmIrq(VmIrqResponseSocket),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700271}
272
273impl AsRef<UnixSeqpacket> for TaggedControlSocket {
274 fn as_ref(&self) -> &UnixSeqpacket {
275 use self::TaggedControlSocket::*;
276 match &self {
Chirantan Ekbote50582532020-01-16 16:49:14 +0900277 Vm(ref socket) => socket.as_ref(),
278 VmMemory(ref socket) => socket.as_ref(),
279 VmIrq(ref socket) => socket.as_ref(),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700280 }
281 }
282}
283
284impl AsRawFd for TaggedControlSocket {
285 fn as_raw_fd(&self) -> RawFd {
286 self.as_ref().as_raw_fd()
287 }
288}
289
Chirantan Ekbote220605a2019-11-14 18:36:06 +0900290fn get_max_open_files() -> Result<libc::rlim64_t> {
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +0900291 let mut buf = mem::MaybeUninit::<libc::rlimit64>::zeroed();
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900292
Chirantan Ekboteaa77ea42019-12-09 14:58:54 +0900293 // Safe because this will only modify `buf` and we check the return value.
294 let res = unsafe { libc::prlimit64(0, libc::RLIMIT_NOFILE, ptr::null(), buf.as_mut_ptr()) };
295 if res == 0 {
296 // Safe because the kernel guarantees that the struct is fully initialized.
297 let limit = unsafe { buf.assume_init() };
298 Ok(limit.rlim_max)
299 } else {
300 Err(Error::GetMaxOpenFiles(io::Error::last_os_error()))
301 }
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900302}
303
Zach Reizner44863792019-06-26 14:22:08 -0700304fn create_base_minijail(
305 root: &Path,
306 log_failures: bool,
307 seccomp_policy: &Path,
308) -> Result<Minijail> {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800309 // All child jails run in a new user namespace without any users mapped,
310 // they run as nobody unless otherwise configured.
David Tolnay5bbbf612018-12-01 17:49:30 -0800311 let mut j = Minijail::new().map_err(Error::DeviceJail)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -0800312 j.namespace_pids();
313 j.namespace_user();
314 j.namespace_user_disable_setgroups();
315 // Don't need any capabilities.
316 j.use_caps(0);
317 // Create a new mount namespace with an empty root FS.
318 j.namespace_vfs();
David Tolnay5bbbf612018-12-01 17:49:30 -0800319 j.enter_pivot_root(root).map_err(Error::DevicePivotRoot)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -0800320 // Run in an empty network namespace.
321 j.namespace_net();
Chirantan Ekbote1a5fe952019-11-27 17:38:54 +0900322 // Most devices don't need to open many fds.
Allen Webbd4afd702019-12-20 16:07:30 -0800323 j.set_rlimit(libc::RLIMIT_NOFILE as i32, 1024, 1024)
Chirantan Ekbote1a5fe952019-11-27 17:38:54 +0900324 .map_err(Error::SettingMaxOpenFiles)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -0800325 // Apply the block device seccomp policy.
326 j.no_new_privs();
Matt Delco45caf912019-11-13 08:11:09 -0800327
328 // By default we'll prioritize using the pre-compiled .bpf over the .policy
329 // file (the .bpf is expected to be compiled using "trap" as the failure
330 // behavior instead of the default "kill" behavior).
331 // Refer to the code comment for the "seccomp-log-failures"
332 // command-line parameter for an explanation about why the |log_failures|
333 // flag forces the use of .policy files (and the build-time alternative to
334 // this run-time flag).
335 let bpf_policy_file = seccomp_policy.with_extension("bpf");
336 if bpf_policy_file.exists() && !log_failures {
337 j.parse_seccomp_program(&bpf_policy_file)
338 .map_err(Error::DeviceJail)?;
339 } else {
340 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP,
341 // which will correctly kill the entire device process if a worker
342 // thread commits a seccomp violation.
343 j.set_seccomp_filter_tsync();
344 if log_failures {
345 j.log_seccomp_filter_failures();
346 }
347 j.parse_seccomp_filters(&seccomp_policy.with_extension("policy"))
348 .map_err(Error::DeviceJail)?;
Zach Reizner44863792019-06-26 14:22:08 -0700349 }
Zach Reizner39aa26b2017-12-12 18:03:23 -0800350 j.use_seccomp_filter();
351 // Don't do init setup.
352 j.run_as_init();
353 Ok(j)
354}
355
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800356fn simple_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
Lepton Wu9105e9f2019-03-14 11:38:31 -0700357 if cfg.sandbox {
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800358 let pivot_root: &str = option_env!("DEFAULT_PIVOT_ROOT").unwrap_or("/var/empty");
359 // A directory for a jailed device's pivot root.
360 let root_path = Path::new(pivot_root);
361 if !root_path.exists() {
362 return Err(Error::PivotRootDoesntExist(pivot_root));
363 }
364 let policy_path: PathBuf = cfg.seccomp_policy_dir.join(policy);
Zach Reizner44863792019-06-26 14:22:08 -0700365 Ok(Some(create_base_minijail(
366 root_path,
367 cfg.seccomp_log_failures,
368 &policy_path,
369 )?))
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800370 } else {
371 Ok(None)
372 }
373}
374
David Tolnayfd0971d2019-03-04 17:15:57 -0800375type DeviceResult<T = VirtioDeviceStub> = std::result::Result<T, Error>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800376
377fn create_block_device(
378 cfg: &Config,
379 disk: &DiskOption,
Jakub Staronecf81e02019-04-11 11:43:39 -0700380 disk_device_socket: DiskControlResponseSocket,
David Tolnay2b089fc2019-03-04 15:33:22 -0800381) -> DeviceResult {
382 // Special case '/proc/self/fd/*' paths. The FD is already open, just use it.
383 let raw_image: File = if disk.path.parent() == Some(Path::new("/proc/self/fd")) {
384 // Safe because we will validate |raw_fd|.
385 unsafe { File::from_raw_fd(raw_fd_from_path(&disk.path)?) }
386 } else {
387 OpenOptions::new()
388 .read(true)
389 .write(!disk.read_only)
390 .open(&disk.path)
391 .map_err(Error::Disk)?
392 };
393 // Lock the disk image to prevent other crosvm instances from using it.
394 let lock_op = if disk.read_only {
395 FlockOperation::LockShared
396 } else {
397 FlockOperation::LockExclusive
398 };
399 flock(&raw_image, lock_op, true).map_err(Error::DiskImageLock)?;
400
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700401 let disk_file = disk::create_disk_file(raw_image).map_err(Error::CreateDiskError)?;
Daniel Verkampe73c80f2019-11-08 10:11:16 -0800402 let dev = virtio::Block::new(
403 disk_file,
404 disk.read_only,
405 disk.sparse,
Daniel Verkamp27672232019-12-06 17:26:55 +1100406 disk.block_size,
Daniel Verkampe73c80f2019-11-08 10:11:16 -0800407 Some(disk_device_socket),
408 )
409 .map_err(Error::BlockDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800410
411 Ok(VirtioDeviceStub {
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700412 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800413 jail: simple_jail(&cfg, "block_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800414 })
415}
416
417fn create_rng_device(cfg: &Config) -> DeviceResult {
418 let dev = virtio::Rng::new().map_err(Error::RngDeviceNew)?;
419
420 Ok(VirtioDeviceStub {
421 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800422 jail: simple_jail(&cfg, "rng_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800423 })
424}
425
426#[cfg(feature = "tpm")]
427fn create_tpm_device(cfg: &Config) -> DeviceResult {
428 use std::ffi::CString;
429 use std::fs;
430 use std::process;
431 use sys_util::chown;
432
433 let tpm_storage: PathBuf;
Matt Delco45caf912019-11-13 08:11:09 -0800434 let mut tpm_jail = simple_jail(&cfg, "tpm_device")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800435
436 match &mut tpm_jail {
437 Some(jail) => {
438 // Create a tmpfs in the device's root directory for tpm
439 // simulator storage. The size is 20*1024, or 20 KB.
440 jail.mount_with_data(
441 Path::new("none"),
442 Path::new("/"),
443 "tmpfs",
444 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
445 "size=20480",
446 )?;
447
448 let crosvm_ids = add_crosvm_user_to_jail(jail, "tpm")?;
449
450 let pid = process::id();
451 let tpm_pid_dir = format!("/run/vm/tpm.{}", pid);
452 tpm_storage = Path::new(&tpm_pid_dir).to_owned();
David Tolnayfd0971d2019-03-04 17:15:57 -0800453 fs::create_dir_all(&tpm_storage)
454 .map_err(|e| Error::CreateTpmStorage(tpm_storage.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800455 let tpm_pid_dir_c = CString::new(tpm_pid_dir).expect("no nul bytes");
David Tolnayfd0971d2019-03-04 17:15:57 -0800456 chown(&tpm_pid_dir_c, crosvm_ids.uid, crosvm_ids.gid)
457 .map_err(Error::ChownTpmStorage)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800458
459 jail.mount_bind(&tpm_storage, &tpm_storage, true)?;
460 }
461 None => {
462 // Path used inside cros_sdk which does not have /run/vm.
463 tpm_storage = Path::new("/tmp/tpm-simulator").to_owned();
464 }
465 }
466
467 let dev = virtio::Tpm::new(tpm_storage);
468
469 Ok(VirtioDeviceStub {
470 dev: Box::new(dev),
471 jail: tpm_jail,
472 })
473}
474
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800475fn create_single_touch_device(cfg: &Config, single_touch_spec: &TouchDeviceOption) -> DeviceResult {
Zach Reizner65b98f12019-11-22 17:34:58 -0800476 let socket = single_touch_spec.path.into_unix_stream().map_err(|e| {
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800477 error!("failed configuring virtio single touch: {:?}", e);
478 e
479 })?;
480
481 let dev = virtio::new_single_touch(socket, single_touch_spec.width, single_touch_spec.height)
482 .map_err(Error::InputDeviceNew)?;
483 Ok(VirtioDeviceStub {
484 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800485 jail: simple_jail(&cfg, "input_device")?,
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800486 })
487}
488
489fn create_trackpad_device(cfg: &Config, trackpad_spec: &TouchDeviceOption) -> DeviceResult {
Zach Reizner65b98f12019-11-22 17:34:58 -0800490 let socket = trackpad_spec.path.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800491 error!("failed configuring virtio trackpad: {}", e);
492 e
493 })?;
494
495 let dev = virtio::new_trackpad(socket, trackpad_spec.width, trackpad_spec.height)
496 .map_err(Error::InputDeviceNew)?;
497
498 Ok(VirtioDeviceStub {
499 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800500 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800501 })
502}
503
Zach Reizner65b98f12019-11-22 17:34:58 -0800504fn create_mouse_device<T: IntoUnixStream>(cfg: &Config, mouse_socket: T) -> DeviceResult {
505 let socket = mouse_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800506 error!("failed configuring virtio mouse: {}", e);
507 e
508 })?;
509
510 let dev = virtio::new_mouse(socket).map_err(Error::InputDeviceNew)?;
511
512 Ok(VirtioDeviceStub {
513 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800514 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800515 })
516}
517
Zach Reizner65b98f12019-11-22 17:34:58 -0800518fn create_keyboard_device<T: IntoUnixStream>(cfg: &Config, keyboard_socket: T) -> DeviceResult {
519 let socket = keyboard_socket.into_unix_stream().map_err(|e| {
David Tolnay2b089fc2019-03-04 15:33:22 -0800520 error!("failed configuring virtio keyboard: {}", e);
521 e
522 })?;
523
524 let dev = virtio::new_keyboard(socket).map_err(Error::InputDeviceNew)?;
525
526 Ok(VirtioDeviceStub {
527 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800528 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800529 })
530}
531
532fn create_vinput_device(cfg: &Config, dev_path: &Path) -> DeviceResult {
533 let dev_file = OpenOptions::new()
534 .read(true)
535 .write(true)
536 .open(dev_path)
David Tolnayfd0971d2019-03-04 17:15:57 -0800537 .map_err(|e| Error::OpenVinput(dev_path.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800538
539 let dev = virtio::new_evdev(dev_file).map_err(Error::InputDeviceNew)?;
540
541 Ok(VirtioDeviceStub {
542 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800543 jail: simple_jail(&cfg, "input_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800544 })
545}
546
Jakub Staron1f828d72019-04-11 12:49:29 -0700547fn create_balloon_device(cfg: &Config, socket: BalloonControlResponseSocket) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800548 let dev = virtio::Balloon::new(socket).map_err(Error::BalloonDeviceNew)?;
549
550 Ok(VirtioDeviceStub {
551 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800552 jail: simple_jail(&cfg, "balloon_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800553 })
554}
555
556fn create_tap_net_device(cfg: &Config, tap_fd: RawFd) -> DeviceResult {
557 // Safe because we ensure that we get a unique handle to the fd.
558 let tap = unsafe {
559 Tap::from_raw_fd(validate_raw_fd(tap_fd).map_err(Error::ValidateRawFd)?)
560 .map_err(Error::CreateTapDevice)?
561 };
562
563 let dev = virtio::Net::from(tap).map_err(Error::NetDeviceNew)?;
564
565 Ok(VirtioDeviceStub {
566 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800567 jail: simple_jail(&cfg, "net_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800568 })
569}
570
571fn create_net_device(
572 cfg: &Config,
573 host_ip: Ipv4Addr,
574 netmask: Ipv4Addr,
575 mac_address: MacAddress,
576 mem: &GuestMemory,
577) -> DeviceResult {
578 let dev = if cfg.vhost_net {
579 let dev =
580 virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(host_ip, netmask, mac_address, mem)
581 .map_err(Error::VhostNetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800582 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800583 } else {
584 let dev =
585 virtio::Net::<Tap>::new(host_ip, netmask, mac_address).map_err(Error::NetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800586 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800587 };
588
589 let policy = if cfg.vhost_net {
Matt Delco45caf912019-11-13 08:11:09 -0800590 "vhost_net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800591 } else {
Matt Delco45caf912019-11-13 08:11:09 -0800592 "net_device"
David Tolnay2b089fc2019-03-04 15:33:22 -0800593 };
594
595 Ok(VirtioDeviceStub {
596 dev,
597 jail: simple_jail(&cfg, policy)?,
598 })
599}
600
601#[cfg(feature = "gpu")]
602fn create_gpu_device(
603 cfg: &Config,
604 exit_evt: &EventFd,
Gurchetan Singh7ec58fa2019-05-15 15:30:38 -0700605 gpu_device_socket: VmMemoryControlRequestSocket,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900606 gpu_sockets: Vec<virtio::resource_bridge::ResourceResponseSocket>,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900607 wayland_socket_path: Option<&PathBuf>,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700608 x_display: Option<String>,
Zach Reizner65b98f12019-11-22 17:34:58 -0800609 event_devices: Vec<EventDevice>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800610) -> DeviceResult {
611 let jailed_wayland_path = Path::new("/wayland-0");
612
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700613 let mut display_backends = vec![
614 virtio::DisplayBackend::X(x_display),
Jason Macnak60eb1fb2020-01-09 14:36:29 -0800615 virtio::DisplayBackend::Stub,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700616 ];
617
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900618 if let Some(socket_path) = wayland_socket_path {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700619 display_backends.insert(
620 0,
621 virtio::DisplayBackend::Wayland(if cfg.sandbox {
622 Some(jailed_wayland_path.to_owned())
623 } else {
624 Some(socket_path.to_owned())
625 }),
626 );
627 }
628
David Tolnay2b089fc2019-03-04 15:33:22 -0800629 let dev = virtio::Gpu::new(
630 exit_evt.try_clone().map_err(Error::CloneEventFd)?,
Gurchetan Singh7ec58fa2019-05-15 15:30:38 -0700631 Some(gpu_device_socket),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700632 NonZeroU8::new(1).unwrap(), // number of scanouts
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900633 gpu_sockets,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700634 display_backends,
Jason Macnakcc7070b2019-11-06 14:48:12 -0800635 cfg.gpu_parameters.as_ref().unwrap(),
Zach Reizner65b98f12019-11-22 17:34:58 -0800636 event_devices,
David Tolnay2b089fc2019-03-04 15:33:22 -0800637 );
638
Matt Delco45caf912019-11-13 08:11:09 -0800639 let jail = match simple_jail(&cfg, "gpu_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800640 Some(mut jail) => {
641 // Create a tmpfs in the device's root directory so that we can bind mount the
642 // dri directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
643 jail.mount_with_data(
644 Path::new("none"),
645 Path::new("/"),
646 "tmpfs",
647 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
648 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800649 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800650
651 // Device nodes required for DRM.
652 let sys_dev_char_path = Path::new("/sys/dev/char");
David Tolnayfd0971d2019-03-04 17:15:57 -0800653 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800654 let sys_devices_path = Path::new("/sys/devices");
David Tolnayfd0971d2019-03-04 17:15:57 -0800655 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800656 let drm_dri_path = Path::new("/dev/dri");
David Tolnayfd0971d2019-03-04 17:15:57 -0800657 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800658
David Riley06787c52019-07-24 12:09:07 -0700659 // If the ARM specific devices exist on the host, bind mount them in.
660 let mali0_path = Path::new("/dev/mali0");
661 if mali0_path.exists() {
662 jail.mount_bind(mali0_path, mali0_path, true)?;
663 }
664
665 let pvr_sync_path = Path::new("/dev/pvr_sync");
666 if pvr_sync_path.exists() {
667 jail.mount_bind(pvr_sync_path, pvr_sync_path, true)?;
668 }
669
David Tolnay2b089fc2019-03-04 15:33:22 -0800670 // Libraries that are required when mesa drivers are dynamically loaded.
David Riley06787c52019-07-24 12:09:07 -0700671 let lib_dirs = &["/usr/lib", "/usr/lib64", "/lib", "/lib64"];
672 for dir in lib_dirs {
673 let dir_path = Path::new(dir);
674 if dir_path.exists() {
675 jail.mount_bind(dir_path, dir_path, false)?;
676 }
677 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800678
679 // Bind mount the wayland socket into jail's root. This is necessary since each
680 // new wayland context must open() the socket.
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700681 if let Some(path) = wayland_socket_path {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900682 jail.mount_bind(path, jailed_wayland_path, true)?;
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700683 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800684
685 add_crosvm_user_to_jail(&mut jail, "gpu")?;
686
David Riley54e660b2019-07-24 17:22:50 -0700687 // pvr driver requires read access to /proc/self/task/*/comm.
688 let proc_path = Path::new("/proc");
689 jail.mount(
690 proc_path,
691 proc_path,
692 "proc",
693 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY) as usize,
694 )?;
695
David Tolnay2b089fc2019-03-04 15:33:22 -0800696 Some(jail)
697 }
698 None => None,
699 };
700
701 Ok(VirtioDeviceStub {
702 dev: Box::new(dev),
703 jail,
704 })
705}
706
707fn create_wayland_device(
708 cfg: &Config,
Gurchetan Singh53edb812019-05-22 08:57:16 -0700709 socket: VmMemoryControlRequestSocket,
David Tolnay2b089fc2019-03-04 15:33:22 -0800710 resource_bridge: Option<virtio::resource_bridge::ResourceRequestSocket>,
711) -> DeviceResult {
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900712 let wayland_socket_dirs = cfg
713 .wayland_socket_paths
714 .iter()
715 .map(|(_name, path)| path.parent())
716 .collect::<Option<Vec<_>>>()
717 .ok_or(Error::InvalidWaylandPath)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800718
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900719 let dev = virtio::Wl::new(cfg.wayland_socket_paths.clone(), socket, resource_bridge)
720 .map_err(Error::WaylandDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800721
Matt Delco45caf912019-11-13 08:11:09 -0800722 let jail = match simple_jail(&cfg, "wl_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800723 Some(mut jail) => {
724 // Create a tmpfs in the device's root directory so that we can bind mount the wayland
725 // socket directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
726 jail.mount_with_data(
727 Path::new("none"),
728 Path::new("/"),
729 "tmpfs",
730 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
731 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800732 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800733
734 // Bind mount the wayland socket's directory into jail's root. This is necessary since
735 // each new wayland context must open() the socket. If the wayland socket is ever
736 // destroyed and remade in the same host directory, new connections will be possible
737 // without restarting the wayland device.
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900738 for dir in &wayland_socket_dirs {
739 jail.mount_bind(dir, dir, true)?;
740 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800741 add_crosvm_user_to_jail(&mut jail, "Wayland")?;
742
743 Some(jail)
744 }
745 None => None,
746 };
747
748 Ok(VirtioDeviceStub {
749 dev: Box::new(dev),
750 jail,
751 })
752}
753
754fn create_vhost_vsock_device(cfg: &Config, cid: u64, mem: &GuestMemory) -> DeviceResult {
755 let dev = virtio::vhost::Vsock::new(cid, mem).map_err(Error::VhostVsockDeviceNew)?;
756
757 Ok(VirtioDeviceStub {
758 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -0800759 jail: simple_jail(&cfg, "vhost_vsock_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -0800760 })
761}
762
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900763fn create_fs_device(
764 cfg: &Config,
765 uid_map: &str,
766 gid_map: &str,
767 src: &Path,
768 tag: &str,
769 fs_cfg: virtio::fs::passthrough::Config,
770) -> DeviceResult {
771 let mut j = Minijail::new().map_err(Error::DeviceJail)?;
772
773 if cfg.sandbox {
774 j.namespace_pids();
775 j.namespace_user();
776 j.namespace_user_disable_setgroups();
777 j.uidmap(uid_map).map_err(Error::SettingUidMap)?;
778 j.gidmap(gid_map).map_err(Error::SettingGidMap)?;
779
780 // Run in an empty network namespace.
781 j.namespace_net();
782
783 j.no_new_privs();
784
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900785 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP, which will correctly kill
786 // the entire device process if a worker thread commits a seccomp violation.
Matt Delco45caf912019-11-13 08:11:09 -0800787 let seccomp_policy = cfg.seccomp_policy_dir.join("fs_device");
Chirantan Ekboteb0ac0072019-11-14 18:45:56 +0900788 j.set_seccomp_filter_tsync();
789 if cfg.seccomp_log_failures {
790 j.log_seccomp_filter_failures();
791 }
792 j.parse_seccomp_filters(&seccomp_policy)
793 .map_err(Error::DeviceJail)?;
794 j.use_seccomp_filter();
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900795
796 // Don't do init setup.
797 j.run_as_init();
798 }
799
800 // Create a new mount namespace with the source directory as the root. We need this even when
801 // sandboxing is disabled as the server relies on the host kernel to prevent path traversals
802 // from leaking out of the shared directory.
803 j.namespace_vfs();
804 j.enter_pivot_root(src).map_err(Error::DevicePivotRoot)?;
805
806 // The file server opens a lot of fds and needs a really high open file limit.
807 let max_open_files = get_max_open_files()?;
Allen Webbd4afd702019-12-20 16:07:30 -0800808 j.set_rlimit(libc::RLIMIT_NOFILE as i32, max_open_files, max_open_files)
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900809 .map_err(Error::SettingMaxOpenFiles)?;
810
811 // TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic
812 // when num_queues > 1.
813 let dev = virtio::fs::Fs::new(tag, 1, fs_cfg).map_err(Error::FsDeviceNew)?;
814
815 Ok(VirtioDeviceStub {
816 dev: Box::new(dev),
817 jail: Some(j),
818 })
819}
820
Chirantan Ekbote1a2683b2019-11-26 16:28:23 +0900821fn create_9p_device(cfg: &Config, src: &Path, tag: &str) -> DeviceResult {
Matt Delco45caf912019-11-13 08:11:09 -0800822 let (jail, root) = match simple_jail(&cfg, "9p_device")? {
David Tolnay2b089fc2019-03-04 15:33:22 -0800823 Some(mut jail) => {
824 // The shared directory becomes the root of the device's file system.
825 let root = Path::new("/");
David Tolnayfd0971d2019-03-04 17:15:57 -0800826 jail.mount_bind(src, root, true)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800827
Chirantan Ekbote1a2683b2019-11-26 16:28:23 +0900828 add_crosvm_user_to_jail(&mut jail, "p9")?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800829 (Some(jail), root)
830 }
831 None => {
832 // There's no bind mount so we tell the server to treat the source directory as the
David Tolnay9deb7d72019-03-05 18:25:44 -0800833 // root.
David Tolnay2b089fc2019-03-04 15:33:22 -0800834 (None, src)
835 }
836 };
837
838 let dev = virtio::P9::new(root, tag).map_err(Error::P9DeviceNew)?;
839
840 Ok(VirtioDeviceStub {
841 dev: Box::new(dev),
842 jail,
843 })
844}
845
Jakub Starona3411ea2019-04-24 10:55:25 -0700846fn create_pmem_device(
847 cfg: &Config,
848 vm: &mut Vm,
849 resources: &mut SystemAllocator,
850 disk: &DiskOption,
851 index: usize,
852) -> DeviceResult {
853 let fd = OpenOptions::new()
854 .read(true)
855 .write(!disk.read_only)
856 .open(&disk.path)
857 .map_err(Error::Disk)?;
858
Stephen Barberdc7c07b2019-12-20 12:43:35 -0800859 let (disk_size, arena_size) = {
Jakub Starona3411ea2019-04-24 10:55:25 -0700860 let metadata = std::fs::metadata(&disk.path).map_err(Error::Disk)?;
Stephen Barberdc7c07b2019-12-20 12:43:35 -0800861 let disk_len = metadata.len();
862 // Linux requires pmem region sizes to be 2 MiB aligned. Linux will fill any partial page
863 // at the end of an mmap'd file and won't write back beyond the actual file length, but if
864 // we just align the size of the file to 2 MiB then access beyond the last page of the
865 // mapped file will generate SIGBUS. So use a memory mapping arena that will provide
866 // padding up to 2 MiB.
867 let alignment = 2 * 1024 * 1024;
868 let align_adjust = if disk_len % alignment != 0 {
869 alignment - (disk_len % alignment)
870 } else {
871 0
872 };
873 (
874 disk_len,
875 disk_len
876 .checked_add(align_adjust)
877 .ok_or(Error::PmemDeviceImageTooBig)?,
878 )
Jakub Starona3411ea2019-04-24 10:55:25 -0700879 };
880
881 let protection = {
882 if disk.read_only {
883 Protection::read()
884 } else {
885 Protection::read_write()
886 }
887 };
888
Stephen Barberdc7c07b2019-12-20 12:43:35 -0800889 let arena = {
Jakub Starona3411ea2019-04-24 10:55:25 -0700890 // Conversion from u64 to usize may fail on 32bit system.
Stephen Barberdc7c07b2019-12-20 12:43:35 -0800891 let arena_size = usize::try_from(arena_size).map_err(|_| Error::PmemDeviceImageTooBig)?;
892 let disk_size = usize::try_from(disk_size).map_err(|_| Error::PmemDeviceImageTooBig)?;
Jakub Starona3411ea2019-04-24 10:55:25 -0700893
Stephen Barberdc7c07b2019-12-20 12:43:35 -0800894 let mut arena = MemoryMappingArena::new(arena_size).map_err(Error::ReservePmemMemory)?;
895 arena
896 .add_fd_offset_protection(0, disk_size, &fd, 0, protection)
897 .map_err(Error::ReservePmemMemory)?;
898 arena
Jakub Starona3411ea2019-04-24 10:55:25 -0700899 };
900
901 let mapping_address = resources
Xiong Zhang383b3b52019-10-30 14:59:26 +0800902 .mmio_allocator(MmioType::High)
Jakub Starona3411ea2019-04-24 10:55:25 -0700903 .allocate_with_align(
Stephen Barberdc7c07b2019-12-20 12:43:35 -0800904 arena_size,
Jakub Starona3411ea2019-04-24 10:55:25 -0700905 Alloc::PmemDevice(index),
906 format!("pmem_disk_image_{}", index),
907 // Linux kernel requires pmem namespaces to be 128 MiB aligned.
908 128 * 1024 * 1024, /* 128 MiB */
909 )
910 .map_err(Error::AllocatePmemDeviceAddress)?;
911
Stephen Barberdc7c07b2019-12-20 12:43:35 -0800912 vm.add_mmap_arena(
Jakub Starona3411ea2019-04-24 10:55:25 -0700913 GuestAddress(mapping_address),
Stephen Barberdc7c07b2019-12-20 12:43:35 -0800914 arena,
Jakub Starona3411ea2019-04-24 10:55:25 -0700915 /* read_only = */ disk.read_only,
916 /* log_dirty_pages = */ false,
917 )
918 .map_err(Error::AddPmemDeviceMemory)?;
919
Stephen Barberdc7c07b2019-12-20 12:43:35 -0800920 let dev = virtio::Pmem::new(fd, GuestAddress(mapping_address), arena_size)
Jakub Starona3411ea2019-04-24 10:55:25 -0700921 .map_err(Error::PmemDeviceNew)?;
922
923 Ok(VirtioDeviceStub {
924 dev: Box::new(dev) as Box<dyn VirtioDevice>,
Matt Delco45caf912019-11-13 08:11:09 -0800925 jail: simple_jail(&cfg, "pmem_device")?,
Jakub Starona3411ea2019-04-24 10:55:25 -0700926 })
927}
928
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -0700929// gpu_device_socket is not used when GPU support is disabled.
930#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -0800931fn create_virtio_devices(
932 cfg: &Config,
Zach Reizner55a9e502018-10-03 10:22:32 -0700933 mem: &GuestMemory,
Jakub Starona3411ea2019-04-24 10:55:25 -0700934 vm: &mut Vm,
935 resources: &mut SystemAllocator,
Zach Reizner55a9e502018-10-03 10:22:32 -0700936 _exit_evt: &EventFd,
Gurchetan Singh53edb812019-05-22 08:57:16 -0700937 wayland_device_socket: VmMemoryControlRequestSocket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -0700938 gpu_device_socket: VmMemoryControlRequestSocket,
Jakub Staron1f828d72019-04-11 12:49:29 -0700939 balloon_device_socket: BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -0700940 disk_device_sockets: &mut Vec<DiskControlResponseSocket>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800941) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -0700942 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -0800943
Zach Reizner8fb52112017-12-13 16:04:39 -0800944 for disk in &cfg.disks {
Daniel Verkamp92f73d72018-12-04 13:17:46 -0800945 let disk_device_socket = disk_device_sockets.remove(0);
David Tolnay2b089fc2019-03-04 15:33:22 -0800946 devs.push(create_block_device(cfg, disk, disk_device_socket)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800947 }
948
Jakub Starona3411ea2019-04-24 10:55:25 -0700949 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
950 devs.push(create_pmem_device(cfg, vm, resources, pmem_disk, index)?);
951 }
952
David Tolnay2b089fc2019-03-04 15:33:22 -0800953 devs.push(create_rng_device(cfg)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800954
David Tolnayde6b29a2018-12-20 11:49:46 -0800955 #[cfg(feature = "tpm")]
956 {
David Tolnay43f8e212019-02-13 17:28:16 -0800957 if cfg.software_tpm {
David Tolnay2b089fc2019-03-04 15:33:22 -0800958 devs.push(create_tpm_device(cfg)?);
David Tolnay43f8e212019-02-13 17:28:16 -0800959 }
David Tolnayde6b29a2018-12-20 11:49:46 -0800960 }
961
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800962 if let Some(single_touch_spec) = &cfg.virtio_single_touch {
963 devs.push(create_single_touch_device(cfg, single_touch_spec)?);
964 }
965
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800966 if let Some(trackpad_spec) = &cfg.virtio_trackpad {
David Tolnay2b089fc2019-03-04 15:33:22 -0800967 devs.push(create_trackpad_device(cfg, trackpad_spec)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800968 }
969
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800970 if let Some(mouse_socket) = &cfg.virtio_mouse {
David Tolnay2b089fc2019-03-04 15:33:22 -0800971 devs.push(create_mouse_device(cfg, mouse_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800972 }
973
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800974 if let Some(keyboard_socket) = &cfg.virtio_keyboard {
David Tolnay2b089fc2019-03-04 15:33:22 -0800975 devs.push(create_keyboard_device(cfg, keyboard_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800976 }
977
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800978 for dev_path in &cfg.virtio_input_evdevs {
David Tolnay2b089fc2019-03-04 15:33:22 -0800979 devs.push(create_vinput_device(cfg, dev_path)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800980 }
981
David Tolnay2b089fc2019-03-04 15:33:22 -0800982 devs.push(create_balloon_device(cfg, balloon_device_socket)?);
Dylan Reid295ccac2017-11-06 14:06:24 -0800983
Zach Reizner39aa26b2017-12-12 18:03:23 -0800984 // We checked above that if the IP is defined, then the netmask is, too.
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800985 for tap_fd in &cfg.tap_fd {
David Tolnay2b089fc2019-03-04 15:33:22 -0800986 devs.push(create_tap_net_device(cfg, *tap_fd)?);
Jorge E. Moreirab7952802019-02-12 16:43:05 -0800987 }
988
David Tolnay2b089fc2019-03-04 15:33:22 -0800989 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
990 (cfg.host_ip, cfg.netmask, cfg.mac_address)
991 {
992 devs.push(create_net_device(cfg, host_ip, netmask, mac_address, mem)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800993 }
994
David Tolnayfa701712019-02-13 16:42:54 -0800995 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900996 let mut resource_bridges = Vec::<virtio::resource_bridge::ResourceResponseSocket>::new();
997
Ryo Hashimoto0b788de2019-12-10 17:14:13 +0900998 if !cfg.wayland_socket_paths.is_empty() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900999 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
1000 let mut wl_resource_bridge = None::<virtio::resource_bridge::ResourceRequestSocket>;
1001
1002 #[cfg(feature = "gpu")]
1003 {
Jason Macnakcc7070b2019-11-06 14:48:12 -08001004 if cfg.gpu_parameters.is_some() {
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001005 let (wl_socket, gpu_socket) =
1006 virtio::resource_bridge::pair().map_err(Error::CreateSocket)?;
1007 resource_bridges.push(gpu_socket);
1008 wl_resource_bridge = Some(wl_socket);
1009 }
1010 }
1011
1012 devs.push(create_wayland_device(
1013 cfg,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +09001014 wayland_device_socket,
1015 wl_resource_bridge,
1016 )?);
1017 }
David Tolnayfa701712019-02-13 16:42:54 -08001018
Zach Reizner3a8100a2017-09-13 19:15:43 -07001019 #[cfg(feature = "gpu")]
1020 {
Jason Macnakcc7070b2019-11-06 14:48:12 -08001021 if cfg.gpu_parameters.is_some() {
Zach Reizner65b98f12019-11-22 17:34:58 -08001022 let mut event_devices = Vec::new();
1023 if cfg.display_window_mouse {
1024 let (event_device_socket, virtio_dev_socket) =
1025 UnixStream::pair().map_err(Error::CreateSocket)?;
1026 // TODO(nkgold): the width/height here should match the display's height/width. When
1027 // those settings are available as CLI options, we should use the CLI options here
1028 // as well.
1029 let dev = virtio::new_single_touch(virtio_dev_socket, 1280, 1024)
1030 .map_err(Error::InputDeviceNew)?;
1031 devs.push(VirtioDeviceStub {
1032 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -08001033 jail: simple_jail(&cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001034 });
1035 event_devices.push(EventDevice::touchscreen(event_device_socket));
1036 }
1037 if cfg.display_window_keyboard {
1038 let (event_device_socket, virtio_dev_socket) =
1039 UnixStream::pair().map_err(Error::CreateSocket)?;
1040 let dev = virtio::new_keyboard(virtio_dev_socket).map_err(Error::InputDeviceNew)?;
1041 devs.push(VirtioDeviceStub {
1042 dev: Box::new(dev),
Matt Delco45caf912019-11-13 08:11:09 -08001043 jail: simple_jail(&cfg, "input_device")?,
Zach Reizner65b98f12019-11-22 17:34:58 -08001044 });
1045 event_devices.push(EventDevice::keyboard(event_device_socket));
1046 }
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001047 devs.push(create_gpu_device(
1048 cfg,
1049 _exit_evt,
1050 gpu_device_socket,
1051 resource_bridges,
Ryo Hashimoto0b788de2019-12-10 17:14:13 +09001052 // Use the unnamed socket for GPU display screens.
1053 cfg.wayland_socket_paths.get(""),
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001054 cfg.x_display.clone(),
Zach Reizner65b98f12019-11-22 17:34:58 -08001055 event_devices,
Zach Reizner0f2cfb02019-06-19 17:46:03 -07001056 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -07001057 }
1058 }
1059
Zach Reizneraa575662018-08-15 10:46:32 -07001060 if let Some(cid) = cfg.cid {
David Tolnay2b089fc2019-03-04 15:33:22 -08001061 devs.push(create_vhost_vsock_device(cfg, cid, mem)?);
Zach Reizneraa575662018-08-15 10:46:32 -07001062 }
1063
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001064 for shared_dir in &cfg.shared_dirs {
1065 let SharedDir {
1066 src,
1067 tag,
1068 kind,
1069 uid_map,
1070 gid_map,
1071 cfg: fs_cfg,
1072 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -08001073
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001074 let dev = match kind {
1075 SharedDirKind::FS => create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone())?,
Chirantan Ekbote1a2683b2019-11-26 16:28:23 +09001076 SharedDirKind::P9 => create_9p_device(cfg, src, tag)?,
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001077 };
1078 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -08001079 }
1080
1081 Ok(devs)
1082}
1083
1084fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -06001085 cfg: &Config,
David Tolnay2b089fc2019-03-04 15:33:22 -08001086 mem: &GuestMemory,
Jakub Starona3411ea2019-04-24 10:55:25 -07001087 vm: &mut Vm,
1088 resources: &mut SystemAllocator,
David Tolnay2b089fc2019-03-04 15:33:22 -08001089 exit_evt: &EventFd,
Xiong Zhanga5d248c2019-09-17 14:17:19 -07001090 control_sockets: &mut Vec<TaggedControlSocket>,
Gurchetan Singh53edb812019-05-22 08:57:16 -07001091 wayland_device_socket: VmMemoryControlRequestSocket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001092 gpu_device_socket: VmMemoryControlRequestSocket,
Jakub Staron1f828d72019-04-11 12:49:29 -07001093 balloon_device_socket: BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07001094 disk_device_sockets: &mut Vec<DiskControlResponseSocket>,
Jingkui Wang100e6e42019-03-08 20:41:57 -08001095 usb_provider: HostBackendDeviceProvider,
David Tolnayfdac5ed2019-03-08 16:56:14 -08001096) -> DeviceResult<Vec<(Box<dyn PciDevice>, Option<Minijail>)>> {
David Tolnay2b089fc2019-03-04 15:33:22 -08001097 let stubs = create_virtio_devices(
1098 &cfg,
1099 mem,
Jakub Starona3411ea2019-04-24 10:55:25 -07001100 vm,
1101 resources,
David Tolnay2b089fc2019-03-04 15:33:22 -08001102 exit_evt,
1103 wayland_device_socket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001104 gpu_device_socket,
David Tolnay2b089fc2019-03-04 15:33:22 -08001105 balloon_device_socket,
1106 disk_device_sockets,
1107 )?;
1108
1109 let mut pci_devices = Vec::new();
1110
1111 for stub in stubs {
Daniel Verkampbb712d62019-11-19 09:47:33 -08001112 let (msi_host_socket, msi_device_socket) =
1113 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
1114 control_sockets.push(TaggedControlSocket::VmIrq(msi_host_socket));
1115 let dev = VirtioPciDevice::new(mem.clone(), stub.dev, msi_device_socket)
1116 .map_err(Error::VirtioPciDev)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -08001117 let dev = Box::new(dev) as Box<dyn PciDevice>;
David Tolnay2b089fc2019-03-04 15:33:22 -08001118 pci_devices.push((dev, stub.jail));
1119 }
1120
1121 if cfg.cras_audio {
paulhsia580d4182019-05-24 16:53:55 +08001122 let mut server = Box::new(CrasClient::new().map_err(Error::CreateCrasClient)?);
1123 if cfg.cras_capture {
1124 server.enable_cras_capture();
1125 }
David Tolnay2b089fc2019-03-04 15:33:22 -08001126 let cras_audio = devices::Ac97Dev::new(mem.clone(), server);
1127
1128 pci_devices.push((
1129 Box::new(cras_audio),
Matt Delco45caf912019-11-13 08:11:09 -08001130 simple_jail(&cfg, "cras_audio_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -08001131 ));
1132 }
1133
1134 if cfg.null_audio {
1135 let server = Box::new(DummyStreamSource::new());
1136 let null_audio = devices::Ac97Dev::new(mem.clone(), server);
1137
1138 pci_devices.push((
1139 Box::new(null_audio),
Matt Delco45caf912019-11-13 08:11:09 -08001140 simple_jail(&cfg, "null_audio_device")?,
David Tolnay2b089fc2019-03-04 15:33:22 -08001141 ));
1142 }
Jingkui Wang100e6e42019-03-08 20:41:57 -08001143 // Create xhci controller.
1144 let usb_controller = Box::new(XhciController::new(mem.clone(), usb_provider));
Matt Delco45caf912019-11-13 08:11:09 -08001145 pci_devices.push((usb_controller, simple_jail(&cfg, "xhci")?));
David Tolnay2b089fc2019-03-04 15:33:22 -08001146
Xiong Zhang17b0daf2019-04-23 17:14:50 +08001147 if cfg.vfio.is_some() {
Xiong Zhang4b5bb3a2019-04-23 17:15:21 +08001148 let (vfio_host_socket_irq, vfio_device_socket_irq) =
1149 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
1150 control_sockets.push(TaggedControlSocket::VmIrq(vfio_host_socket_irq));
1151
Xiong Zhang85abeff2019-04-23 17:15:24 +08001152 let (vfio_host_socket_mem, vfio_device_socket_mem) =
1153 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?;
1154 control_sockets.push(TaggedControlSocket::VmMemory(vfio_host_socket_mem));
1155
Xiong Zhang17b0daf2019-04-23 17:14:50 +08001156 let vfio_path = cfg.vfio.as_ref().unwrap().as_path();
Xiong Zhangc554fff2019-04-23 17:14:55 +08001157 let vfiodevice =
Daniel Verkamp04a82c72019-09-24 11:06:58 -07001158 VfioDevice::new(vfio_path, vm, mem.clone()).map_err(Error::CreateVfioDevice)?;
Xiong Zhang85abeff2019-04-23 17:15:24 +08001159 let vfiopcidevice = Box::new(VfioPciDevice::new(
1160 vfiodevice,
1161 vfio_device_socket_irq,
1162 vfio_device_socket_mem,
1163 ));
Matt Delco45caf912019-11-13 08:11:09 -08001164 pci_devices.push((vfiopcidevice, simple_jail(&cfg, "vfio_device")?));
Xiong Zhang17b0daf2019-04-23 17:14:50 +08001165 }
1166
David Tolnay2b089fc2019-03-04 15:33:22 -08001167 Ok(pci_devices)
1168}
1169
1170#[derive(Copy, Clone)]
Chirantan Ekbote1a2683b2019-11-26 16:28:23 +09001171#[cfg_attr(not(feature = "tpm"), allow(dead_code))]
David Tolnay2b089fc2019-03-04 15:33:22 -08001172struct Ids {
1173 uid: uid_t,
1174 gid: gid_t,
1175}
1176
David Tolnay48c48292019-03-01 16:54:25 -08001177// Set the uid/gid for the jailed process and give a basic id map. This is
1178// required for bind mounts to work.
David Tolnayfd0971d2019-03-04 17:15:57 -08001179fn add_crosvm_user_to_jail(jail: &mut Minijail, feature: &str) -> Result<Ids> {
David Tolnay48c48292019-03-01 16:54:25 -08001180 let crosvm_user_group = CStr::from_bytes_with_nul(b"crosvm\0").unwrap();
1181
1182 let crosvm_uid = match get_user_id(&crosvm_user_group) {
1183 Ok(u) => u,
1184 Err(e) => {
1185 warn!("falling back to current user id for {}: {}", feature, e);
1186 geteuid()
1187 }
1188 };
1189
1190 let crosvm_gid = match get_group_id(&crosvm_user_group) {
1191 Ok(u) => u,
1192 Err(e) => {
1193 warn!("falling back to current group id for {}: {}", feature, e);
1194 getegid()
1195 }
1196 };
1197
1198 jail.change_uid(crosvm_uid);
1199 jail.change_gid(crosvm_gid);
1200 jail.uidmap(&format!("{0} {0} 1", crosvm_uid))
1201 .map_err(Error::SettingUidMap)?;
1202 jail.gidmap(&format!("{0} {0} 1", crosvm_gid))
1203 .map_err(Error::SettingGidMap)?;
1204
David Tolnay41a6f842019-03-01 16:18:44 -08001205 Ok(Ids {
1206 uid: crosvm_uid,
1207 gid: crosvm_gid,
1208 })
David Tolnay48c48292019-03-01 16:54:25 -08001209}
1210
David Tolnayfd0971d2019-03-04 17:15:57 -08001211fn raw_fd_from_path(path: &Path) -> Result<RawFd> {
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001212 if !path.is_file() {
David Tolnayfd0971d2019-03-04 17:15:57 -08001213 return Err(Error::InvalidFdPath);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001214 }
1215 let raw_fd = path
1216 .file_name()
1217 .and_then(|fd_osstr| fd_osstr.to_str())
1218 .and_then(|fd_str| fd_str.parse::<c_int>().ok())
1219 .ok_or(Error::InvalidFdPath)?;
David Tolnayfd0971d2019-03-04 17:15:57 -08001220 validate_raw_fd(raw_fd).map_err(Error::ValidateRawFd)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001221}
1222
Zach Reizner65b98f12019-11-22 17:34:58 -08001223trait IntoUnixStream {
1224 fn into_unix_stream(self) -> Result<UnixStream>;
1225}
1226
1227impl<'a> IntoUnixStream for &'a Path {
1228 fn into_unix_stream(self) -> Result<UnixStream> {
1229 if self.parent() == Some(Path::new("/proc/self/fd")) {
1230 // Safe because we will validate |raw_fd|.
1231 unsafe { Ok(UnixStream::from_raw_fd(raw_fd_from_path(self)?)) }
1232 } else {
1233 UnixStream::connect(self).map_err(Error::InputEventsOpen)
1234 }
1235 }
1236}
1237impl<'a> IntoUnixStream for &'a PathBuf {
1238 fn into_unix_stream(self) -> Result<UnixStream> {
1239 self.as_path().into_unix_stream()
1240 }
1241}
1242
1243impl IntoUnixStream for UnixStream {
1244 fn into_unix_stream(self) -> Result<UnixStream> {
1245 Ok(self)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001246 }
1247}
1248
Matt Delco84cf9c02019-10-07 22:38:13 -07001249fn setup_vcpu_signal_handler(use_kvm_signals: bool) -> Result<()> {
1250 if use_kvm_signals {
1251 unsafe {
1252 extern "C" fn handle_signal() {}
1253 // Our signal handler does nothing and is trivially async signal safe.
1254 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
1255 .map_err(Error::RegisterSignalHandler)?;
1256 }
1257 block_signal(SIGRTMIN() + 0).map_err(Error::BlockSignal)?;
1258 } else {
1259 unsafe {
1260 extern "C" fn handle_signal() {
1261 Vcpu::set_local_immediate_exit(true);
1262 }
1263 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
1264 .map_err(Error::RegisterSignalHandler)?;
1265 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001266 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001267 Ok(())
1268}
1269
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001270#[derive(Default)]
1271struct VcpuRunMode {
1272 mtx: Mutex<VmRunMode>,
1273 cvar: Condvar,
1274}
1275
1276impl VcpuRunMode {
1277 fn set_and_notify(&self, new_mode: VmRunMode) {
1278 *self.mtx.lock() = new_mode;
1279 self.cvar.notify_all();
1280 }
1281}
1282
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001283// Converts a vcpu into a runnable vcpu if possible. On failure, returns `None`.
1284fn runnable_vcpu(vcpu: Vcpu, use_kvm_signals: bool, cpu_id: u32) -> Option<RunnableVcpu> {
1285 if use_kvm_signals {
1286 match get_blocked_signals() {
1287 Ok(mut v) => {
1288 v.retain(|&x| x != SIGRTMIN() + 0);
1289 if let Err(e) = vcpu.set_signal_mask(&v) {
1290 error!(
1291 "Failed to set the KVM_SIGNAL_MASK for vcpu {} : {}",
1292 cpu_id, e
1293 );
1294 return None;
1295 }
1296 }
1297 Err(e) => {
1298 error!("Failed to retrieve signal mask for vcpu {} : {}", cpu_id, e);
1299 return None;
1300 }
1301 };
1302 }
1303
1304 match vcpu.to_runnable(Some(SIGRTMIN() + 0)) {
1305 Ok(v) => Some(v),
1306 Err(e) => {
1307 error!("Failed to set thread id for vcpu {} : {}", cpu_id, e);
1308 None
1309 }
1310 }
1311}
1312
Zach Reizner55a9e502018-10-03 10:22:32 -07001313fn run_vcpu(
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001314 vcpu: Vcpu,
Zach Reizner55a9e502018-10-03 10:22:32 -07001315 cpu_id: u32,
Daniel Verkamp107edb32019-04-05 09:58:48 -07001316 vcpu_affinity: Vec<usize>,
Zach Reizner55a9e502018-10-03 10:22:32 -07001317 start_barrier: Arc<Barrier>,
1318 io_bus: devices::Bus,
1319 mmio_bus: devices::Bus,
1320 exit_evt: EventFd,
Zach Reizner795355a2019-01-16 17:37:57 -08001321 requires_kvmclock_ctrl: bool,
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001322 run_mode_arc: Arc<VcpuRunMode>,
Matt Delco84cf9c02019-10-07 22:38:13 -07001323 use_kvm_signals: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001324) -> Result<JoinHandle<()>> {
Zach Reizner8fb52112017-12-13 16:04:39 -08001325 thread::Builder::new()
1326 .name(format!("crosvm_vcpu{}", cpu_id))
1327 .spawn(move || {
Zach Reizner95885312020-01-29 18:06:01 -08001328 // The VCPU thread must trigger the `exit_evt` in all paths, and a `ScopedEvent`'s Drop
1329 // implementation accomplishes that.
1330 let _scoped_exit_evt = ScopedEvent::from(exit_evt);
1331
Daniel Verkamp107edb32019-04-05 09:58:48 -07001332 if vcpu_affinity.len() != 0 {
1333 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
1334 error!("Failed to set CPU affinity: {}", e);
1335 }
1336 }
1337
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001338 let vcpu = runnable_vcpu(vcpu, use_kvm_signals, cpu_id);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001339
Zach Reizner8fb52112017-12-13 16:04:39 -08001340 start_barrier.wait();
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001341
Dylan Reidbb30b2f2019-10-22 18:30:36 +03001342 if let Some(vcpu) = vcpu {
Zach Reizner95885312020-01-29 18:06:01 -08001343 loop {
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001344 let mut interrupted_by_signal = false;
David Tolnay8f3a2322018-11-30 17:11:35 -08001345 match vcpu.run() {
1346 Ok(VcpuExit::IoIn { port, mut size }) => {
1347 let mut data = [0; 8];
1348 if size > data.len() {
1349 error!("unsupported IoIn size of {} bytes", size);
1350 size = data.len();
Zach Reizner39aa26b2017-12-12 18:03:23 -08001351 }
David Tolnay8f3a2322018-11-30 17:11:35 -08001352 io_bus.read(port as u64, &mut data[..size]);
1353 if let Err(e) = vcpu.set_data(&data[..size]) {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001354 error!("failed to set return data for IoIn: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001355 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001356 }
David Tolnay8f3a2322018-11-30 17:11:35 -08001357 Ok(VcpuExit::IoOut {
1358 port,
1359 mut size,
1360 data,
1361 }) => {
1362 if size > data.len() {
1363 error!("unsupported IoOut size of {} bytes", size);
1364 size = data.len();
1365 }
1366 io_bus.write(port as u64, &data[..size]);
1367 }
1368 Ok(VcpuExit::MmioRead { address, size }) => {
1369 let mut data = [0; 8];
1370 mmio_bus.read(address, &mut data[..size]);
1371 // Setting data for mmio can not fail.
1372 let _ = vcpu.set_data(&data[..size]);
1373 }
1374 Ok(VcpuExit::MmioWrite {
1375 address,
1376 size,
1377 data,
1378 }) => {
1379 mmio_bus.write(address, &data[..size]);
1380 }
1381 Ok(VcpuExit::Hlt) => break,
1382 Ok(VcpuExit::Shutdown) => break,
Stephen Barberd0e2a252019-12-19 14:26:25 -08001383 Ok(VcpuExit::FailEntry {
1384 hardware_entry_failure_reason,
1385 }) => {
1386 error!("vcpu hw run failure: {:#x}", hardware_entry_failure_reason);
1387 break;
1388 },
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001389 Ok(VcpuExit::SystemEvent(_, _)) => break,
David Tolnay8f3a2322018-11-30 17:11:35 -08001390 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
1391 Err(e) => match e.errno() {
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001392 libc::EINTR => interrupted_by_signal = true,
1393 libc::EAGAIN => {}
David Tolnay8f3a2322018-11-30 17:11:35 -08001394 _ => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001395 error!("vcpu hit unknown error: {}", e);
David Tolnay8f3a2322018-11-30 17:11:35 -08001396 break;
1397 }
1398 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08001399 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001400
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001401 if interrupted_by_signal {
Matt Delco84cf9c02019-10-07 22:38:13 -07001402 if use_kvm_signals {
1403 // Try to clear the signal that we use to kick VCPU if it is pending before
1404 // attempting to handle pause requests.
1405 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
1406 error!("failed to clear pending signal: {}", e);
1407 break;
1408 }
1409 } else {
1410 vcpu.set_immediate_exit(false);
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001411 }
1412 let mut run_mode_lock = run_mode_arc.mtx.lock();
1413 loop {
1414 match *run_mode_lock {
1415 VmRunMode::Running => break,
Zach Reizner795355a2019-01-16 17:37:57 -08001416 VmRunMode::Suspending => {
1417 // On KVM implementations that use a paravirtualized clock (e.g.
1418 // x86), a flag must be set to indicate to the guest kernel that
1419 // a VCPU was suspended. The guest kernel will use this flag to
1420 // prevent the soft lockup detection from triggering when this
1421 // VCPU resumes, which could happen days later in realtime.
1422 if requires_kvmclock_ctrl {
1423 if let Err(e) = vcpu.kvmclock_ctrl() {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001424 error!("failed to signal to kvm that vcpu {} is being suspended: {}", cpu_id, e);
Zach Reizner795355a2019-01-16 17:37:57 -08001425 }
1426 }
1427 }
Zach Reizner95885312020-01-29 18:06:01 -08001428 VmRunMode::Exiting => return,
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001429 }
1430 // Give ownership of our exclusive lock to the condition variable that
1431 // will block. When the condition variable is notified, `wait` will
1432 // unblock and return a new exclusive lock.
1433 run_mode_lock = run_mode_arc.cvar.wait(run_mode_lock);
1434 }
1435 }
David Tolnay8f3a2322018-11-30 17:11:35 -08001436 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001437 }
David Tolnay2bac1e72018-12-12 14:33:42 -08001438 })
1439 .map_err(Error::SpawnVcpu)
Zach Reizner39aa26b2017-12-12 18:03:23 -08001440}
1441
Sonny Raod5f66082019-04-24 12:24:38 -07001442// Reads the contents of a file and converts the space-separated fields into a Vec of u64s.
1443// Returns an error if any of the fields fail to parse.
1444fn file_fields_to_u64<P: AsRef<Path>>(path: P) -> io::Result<Vec<u64>> {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001445 let mut file = File::open(path)?;
1446
1447 let mut buf = [0u8; 32];
1448 let count = file.read(&mut buf)?;
1449
Zach Reizner55a9e502018-10-03 10:22:32 -07001450 let content =
1451 str::from_utf8(&buf[..count]).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1452 content
1453 .trim()
Sonny Raod5f66082019-04-24 12:24:38 -07001454 .split_whitespace()
1455 .map(|x| {
1456 x.parse::<u64>()
1457 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
1458 })
1459 .collect()
1460}
1461
1462// Reads the contents of a file and converts them into a u64, and if there
1463// are multiple fields it only returns the first one.
1464fn file_to_u64<P: AsRef<Path>>(path: P) -> io::Result<u64> {
1465 file_fields_to_u64(path)?
1466 .into_iter()
1467 .next()
1468 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "empty file"))
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001469}
1470
Dylan Reid059a1882018-07-23 17:58:09 -07001471pub fn run_config(cfg: Config) -> Result<()> {
Lepton Wu9105e9f2019-03-14 11:38:31 -07001472 if cfg.sandbox {
Dylan Reid059a1882018-07-23 17:58:09 -07001473 // Printing something to the syslog before entering minijail so that libc's syslogger has a
1474 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
1475 // access to those files will not be possible.
1476 info!("crosvm entering multiprocess mode");
1477 }
1478
Jingkui Wang100e6e42019-03-08 20:41:57 -08001479 let (usb_control_socket, usb_provider) =
David Tolnay5fb3f512019-04-12 19:22:33 -07001480 HostBackendDeviceProvider::new().map_err(Error::CreateUsbProvider)?;
Dylan Reid059a1882018-07-23 17:58:09 -07001481 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
1482 // before any jailed devices have been spawned, so that we can catch any of them that fail very
1483 // quickly.
1484 let sigchld_fd = SignalFd::new(libc::SIGCHLD).map_err(Error::CreateSignalFd)?;
1485
David Tolnay2b089fc2019-03-04 15:33:22 -08001486 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
1487 Some(File::open(initrd_path).map_err(|e| Error::OpenInitrd(initrd_path.clone(), e))?)
Daniel Verkampe403f5c2018-12-11 16:29:26 -08001488 } else {
1489 None
1490 };
1491
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07001492 let vm_image = match cfg.executable_path {
1493 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
1494 File::open(kernel_path).map_err(|e| Error::OpenKernel(kernel_path.to_path_buf(), e))?,
1495 ),
1496 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
1497 File::open(bios_path).map_err(|e| Error::OpenBios(bios_path.to_path_buf(), e))?,
1498 ),
1499 _ => panic!("Did not receive a bios or kernel, should be impossible."),
1500 };
1501
Dylan Reid059a1882018-07-23 17:58:09 -07001502 let components = VmComponents {
Daniel Verkamp6a847062019-11-26 13:16:35 -08001503 memory_size: cfg
1504 .memory
1505 .unwrap_or(256)
1506 .checked_mul(1024 * 1024)
1507 .ok_or(Error::MemoryTooLarge)?,
Dylan Reid059a1882018-07-23 17:58:09 -07001508 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -07001509 vcpu_affinity: cfg.vcpu_affinity.clone(),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07001510 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -08001511 android_fstab: cfg
1512 .android_fstab
1513 .as_ref()
David Tolnay2b089fc2019-03-04 15:33:22 -08001514 .map(|x| File::open(x).map_err(|e| Error::OpenAndroidFstab(x.to_path_buf(), e)))
Tristan Muntsinger4133b012018-12-21 16:01:56 -08001515 .map_or(Ok(None), |v| v.map(Some))?,
Kansho Nishida282115b2019-12-18 13:13:14 +09001516 pstore: cfg.pstore.clone(),
Daniel Verkampe403f5c2018-12-11 16:29:26 -08001517 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -07001518 extra_kernel_params: cfg.params.clone(),
1519 wayland_dmabuf: cfg.wayland_dmabuf,
Dylan Reid059a1882018-07-23 17:58:09 -07001520 };
1521
Zach Reiznera60744b2019-02-13 17:33:32 -08001522 let control_server_socket = match &cfg.socket_path {
1523 Some(path) => Some(UnlinkUnixSeqpacketListener(
1524 UnixSeqpacketListener::bind(path).map_err(Error::CreateSocket)?,
1525 )),
1526 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07001527 };
Zach Reiznera60744b2019-02-13 17:33:32 -08001528
1529 let mut control_sockets = Vec::new();
Zach Reizner55a9e502018-10-03 10:22:32 -07001530 let (wayland_host_socket, wayland_device_socket) =
Gurchetan Singh53edb812019-05-22 08:57:16 -07001531 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?;
1532 control_sockets.push(TaggedControlSocket::VmMemory(wayland_host_socket));
Dylan Reid059a1882018-07-23 17:58:09 -07001533 // Balloon gets a special socket so balloon requests can be forwarded from the main process.
Zach Reizner55a9e502018-10-03 10:22:32 -07001534 let (balloon_host_socket, balloon_device_socket) =
Jakub Staron1f828d72019-04-11 12:49:29 -07001535 msg_socket::pair::<BalloonControlCommand, ()>().map_err(Error::CreateSocket)?;
Dylan Reid059a1882018-07-23 17:58:09 -07001536
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001537 // Create one control socket per disk.
1538 let mut disk_device_sockets = Vec::new();
1539 let mut disk_host_sockets = Vec::new();
1540 let disk_count = cfg.disks.len();
1541 for _ in 0..disk_count {
1542 let (disk_host_socket, disk_device_socket) =
Jakub Staronecf81e02019-04-11 11:43:39 -07001543 msg_socket::pair::<DiskControlCommand, DiskControlResult>()
1544 .map_err(Error::CreateSocket)?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001545 disk_host_sockets.push(disk_host_socket);
Jakub Starone7c59052019-04-09 12:31:14 -07001546 disk_device_sockets.push(disk_device_socket);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001547 }
1548
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001549 let (gpu_host_socket, gpu_device_socket) =
1550 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?;
1551 control_sockets.push(TaggedControlSocket::VmMemory(gpu_host_socket));
1552
Lepton Wu20333e42019-03-14 10:48:03 -07001553 let sandbox = cfg.sandbox;
Trent Begin17ccaad2019-04-17 13:51:25 -06001554 let linux = Arch::build_vm(
1555 components,
1556 cfg.split_irqchip,
1557 &cfg.serial_parameters,
Matt Delco45caf912019-11-13 08:11:09 -08001558 simple_jail(&cfg, "serial")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07001559 |mem, vm, sys_allocator, exit_evt| {
Trent Begin17ccaad2019-04-17 13:51:25 -06001560 create_devices(
1561 &cfg,
Jakub Starona3411ea2019-04-24 10:55:25 -07001562 mem,
1563 vm,
1564 sys_allocator,
1565 exit_evt,
Xiong Zhanga5d248c2019-09-17 14:17:19 -07001566 &mut control_sockets,
Trent Begin17ccaad2019-04-17 13:51:25 -06001567 wayland_device_socket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001568 gpu_device_socket,
Trent Begin17ccaad2019-04-17 13:51:25 -06001569 balloon_device_socket,
1570 &mut disk_device_sockets,
1571 usb_provider,
1572 )
1573 },
1574 )
David Tolnaybe034262019-03-04 17:48:36 -08001575 .map_err(Error::BuildVm)?;
Lepton Wu60893882018-11-21 11:06:18 -08001576
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001577 run_control(
1578 linux,
Zach Reiznera60744b2019-02-13 17:33:32 -08001579 control_server_socket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001580 control_sockets,
1581 balloon_host_socket,
1582 &disk_host_sockets,
Jingkui Wang100e6e42019-03-08 20:41:57 -08001583 usb_control_socket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001584 sigchld_fd,
Lepton Wu20333e42019-03-14 10:48:03 -07001585 sandbox,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001586 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07001587}
1588
Zach Reizner55a9e502018-10-03 10:22:32 -07001589fn run_control(
1590 mut linux: RunnableLinuxVm,
Zach Reiznera60744b2019-02-13 17:33:32 -08001591 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Jakub Starond99cd0a2019-04-11 14:09:39 -07001592 mut control_sockets: Vec<TaggedControlSocket>,
Jakub Staron1f828d72019-04-11 12:49:29 -07001593 balloon_host_socket: BalloonControlRequestSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07001594 disk_host_sockets: &[DiskControlRequestSocket],
Jingkui Wang100e6e42019-03-08 20:41:57 -08001595 usb_control_socket: UsbControlSocket,
Zach Reizner55a9e502018-10-03 10:22:32 -07001596 sigchld_fd: SignalFd,
Lepton Wu20333e42019-03-14 10:48:03 -07001597 sandbox: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001598) -> Result<()> {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001599 // Paths to get the currently available memory and the low memory threshold.
David Tolnay5bbbf612018-12-01 17:49:30 -08001600 const LOWMEM_MARGIN: &str = "/sys/kernel/mm/chromeos-low_mem/margin";
1601 const LOWMEM_AVAILABLE: &str = "/sys/kernel/mm/chromeos-low_mem/available";
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001602
1603 // The amount of additional memory to claim back from the VM whenever the system is
1604 // low on memory.
1605 const ONE_GB: u64 = (1 << 30);
1606
Dylan Reid0ed91ab2018-05-31 15:42:18 -07001607 let max_balloon_memory = match linux.vm.get_memory().memory_size() {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001608 // If the VM has at least 1.5 GB, the balloon driver can consume all but the last 1 GB.
1609 n if n >= (ONE_GB / 2) * 3 => n - ONE_GB,
1610 // Otherwise, if the VM has at least 500MB the balloon driver will consume at most
1611 // half of it.
1612 n if n >= (ONE_GB / 2) => n / 2,
1613 // Otherwise, the VM is too small for us to take memory away from it.
1614 _ => 0,
1615 };
1616 let mut current_balloon_memory: u64 = 0;
1617 let balloon_memory_increment: u64 = max_balloon_memory / 16;
1618
Zach Reizner5bed0d22018-03-28 02:31:11 -07001619 #[derive(PollToken)]
1620 enum Token {
1621 Exit,
Zach Reizner5bed0d22018-03-28 02:31:11 -07001622 ChildSignal,
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001623 CheckAvailableMemory,
1624 LowMemory,
1625 LowmemTimer,
Zach Reiznera60744b2019-02-13 17:33:32 -08001626 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07001627 VmControl { index: usize },
1628 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001629
Zach Reizner19ad1f32019-12-12 18:58:50 -08001630 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08001631 .set_raw_mode()
1632 .expect("failed to set terminal raw mode");
1633
Zach Reiznerb2110be2019-07-23 15:55:03 -07001634 let poll_ctx = PollContext::build_with(&[
1635 (&linux.exit_evt, Token::Exit),
1636 (&sigchld_fd, Token::ChildSignal),
1637 ])
1638 .map_err(Error::PollContextAdd)?;
1639
Zach Reiznera60744b2019-02-13 17:33:32 -08001640 if let Some(socket_server) = &control_server_socket {
1641 poll_ctx
1642 .add(socket_server, Token::VmControlServer)
1643 .map_err(Error::PollContextAdd)?;
1644 }
Dylan Reid059a1882018-07-23 17:58:09 -07001645 for (index, socket) in control_sockets.iter().enumerate() {
Zach Reizner55a9e502018-10-03 10:22:32 -07001646 poll_ctx
1647 .add(socket.as_ref(), Token::VmControl { index })
1648 .map_err(Error::PollContextAdd)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001649 }
1650
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001651 // Watch for low memory notifications and take memory back from the VM.
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001652 let low_mem = File::open("/dev/chromeos-low-mem").ok();
David Tolnay64cd5ea2019-04-15 15:56:35 -07001653 if let Some(low_mem) = &low_mem {
Zach Reizner55a9e502018-10-03 10:22:32 -07001654 poll_ctx
1655 .add(low_mem, Token::LowMemory)
1656 .map_err(Error::PollContextAdd)?;
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001657 } else {
1658 warn!("Unable to open low mem indicator, maybe not a chrome os kernel");
1659 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001660
1661 // Used to rate limit balloon requests.
1662 let mut lowmem_timer = TimerFd::new().map_err(Error::CreateTimerFd)?;
Zach Reizner55a9e502018-10-03 10:22:32 -07001663 poll_ctx
1664 .add(&lowmem_timer, Token::LowmemTimer)
1665 .map_err(Error::PollContextAdd)?;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001666
1667 // Used to check whether it's ok to start giving memory back to the VM.
1668 let mut freemem_timer = TimerFd::new().map_err(Error::CreateTimerFd)?;
Zach Reizner55a9e502018-10-03 10:22:32 -07001669 poll_ctx
1670 .add(&freemem_timer, Token::CheckAvailableMemory)
1671 .map_err(Error::PollContextAdd)?;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001672
1673 // Used to add jitter to timer values so that we don't have a thundering herd problem when
1674 // multiple VMs are running.
Daniel Prilik22006042019-01-14 14:19:04 -08001675 let mut simple_rng = SimpleRng::new(
1676 SystemTime::now()
1677 .duration_since(UNIX_EPOCH)
1678 .expect("time went backwards")
1679 .subsec_nanos() as u64,
1680 );
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001681
Lepton Wu20333e42019-03-14 10:48:03 -07001682 if sandbox {
1683 // Before starting VCPUs, in case we started with some capabilities, drop them all.
1684 drop_capabilities().map_err(Error::DropCapabilities)?;
1685 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08001686
Daniel Verkamp37c4a782019-01-04 10:44:17 -08001687 let mut vcpu_handles = Vec::with_capacity(linux.vcpus.len());
1688 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpus.len() + 1));
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001689 let run_mode_arc = Arc::new(VcpuRunMode::default());
Matt Delco84cf9c02019-10-07 22:38:13 -07001690 let use_kvm_signals = !linux.kvm.check_extension(Cap::ImmediateExit);
1691 setup_vcpu_signal_handler(use_kvm_signals)?;
Daniel Verkamp94c35272019-09-12 13:31:30 -07001692 let vcpus = linux.vcpus.split_off(0);
1693 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Zach Reizner55a9e502018-10-03 10:22:32 -07001694 let handle = run_vcpu(
1695 vcpu,
1696 cpu_id as u32,
Daniel Verkamp107edb32019-04-05 09:58:48 -07001697 linux.vcpu_affinity.clone(),
Zach Reizner55a9e502018-10-03 10:22:32 -07001698 vcpu_thread_barrier.clone(),
1699 linux.io_bus.clone(),
1700 linux.mmio_bus.clone(),
1701 linux.exit_evt.try_clone().map_err(Error::CloneEventFd)?,
Zach Reizner795355a2019-01-16 17:37:57 -08001702 linux.vm.check_extension(Cap::KvmclockCtrl),
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001703 run_mode_arc.clone(),
Matt Delco84cf9c02019-10-07 22:38:13 -07001704 use_kvm_signals,
Zach Reizner55a9e502018-10-03 10:22:32 -07001705 )?;
Dylan Reid059a1882018-07-23 17:58:09 -07001706 vcpu_handles.push(handle);
1707 }
1708 vcpu_thread_barrier.wait();
1709
Zach Reizner39aa26b2017-12-12 18:03:23 -08001710 'poll: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07001711 let events = {
1712 match poll_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001713 Ok(v) => v,
1714 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001715 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001716 break;
1717 }
1718 }
1719 };
Zach Reiznera60744b2019-02-13 17:33:32 -08001720
1721 let mut vm_control_indices_to_remove = Vec::new();
Zach Reizner5bed0d22018-03-28 02:31:11 -07001722 for event in events.iter_readable() {
1723 match event.token() {
1724 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001725 info!("vcpu requested shutdown");
1726 break 'poll;
1727 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001728 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001729 // Print all available siginfo structs, then exit the loop.
David Tolnayf5032762018-12-03 10:46:45 -08001730 while let Some(siginfo) = sigchld_fd.read().map_err(Error::SignalFd)? {
Zach Reizner3ba00982019-01-23 19:04:43 -08001731 let pid = siginfo.ssi_pid;
1732 let pid_label = match linux.pid_debug_label_map.get(&pid) {
1733 Some(label) => format!("{} (pid {})", label, pid),
1734 None => format!("pid {}", pid),
1735 };
David Tolnayf5032762018-12-03 10:46:45 -08001736 error!(
1737 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08001738 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08001739 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08001740 }
David Tolnayf5032762018-12-03 10:46:45 -08001741 break 'poll;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001742 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001743 Token::CheckAvailableMemory => {
1744 // Acknowledge the timer.
1745 freemem_timer.wait().map_err(Error::TimerFd)?;
1746 if current_balloon_memory == 0 {
1747 // Nothing to see here.
1748 if let Err(e) = freemem_timer.clear() {
1749 warn!("unable to clear available memory check timer: {}", e);
1750 }
1751 continue;
1752 }
1753
1754 // Otherwise see if we can free up some memory.
1755 let margin = file_to_u64(LOWMEM_MARGIN).map_err(Error::ReadLowmemMargin)?;
Zach Reizner55a9e502018-10-03 10:22:32 -07001756 let available =
1757 file_to_u64(LOWMEM_AVAILABLE).map_err(Error::ReadLowmemAvailable)?;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001758
1759 // `available` and `margin` are specified in MB while `balloon_memory_increment` is in
1760 // bytes. So to correctly compare them we need to turn the increment value into MB.
Zach Reizner55a9e502018-10-03 10:22:32 -07001761 if available >= margin + 2 * (balloon_memory_increment >> 20) {
1762 current_balloon_memory =
1763 if current_balloon_memory >= balloon_memory_increment {
1764 current_balloon_memory - balloon_memory_increment
1765 } else {
1766 0
1767 };
Jakub Staron1f828d72019-04-11 12:49:29 -07001768 let command = BalloonControlCommand::Adjust {
1769 num_bytes: current_balloon_memory,
1770 };
1771 if let Err(e) = balloon_host_socket.send(&command) {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001772 warn!("failed to send memory value to balloon device: {}", e);
1773 }
1774 }
1775 }
1776 Token::LowMemory => {
David Tolnay64cd5ea2019-04-15 15:56:35 -07001777 if let Some(low_mem) = &low_mem {
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001778 let old_balloon_memory = current_balloon_memory;
Zach Reizner55a9e502018-10-03 10:22:32 -07001779 current_balloon_memory = min(
1780 current_balloon_memory + balloon_memory_increment,
1781 max_balloon_memory,
1782 );
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001783 if current_balloon_memory != old_balloon_memory {
Jakub Staron1f828d72019-04-11 12:49:29 -07001784 let command = BalloonControlCommand::Adjust {
1785 num_bytes: current_balloon_memory,
1786 };
1787 if let Err(e) = balloon_host_socket.send(&command) {
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001788 warn!("failed to send memory value to balloon device: {}", e);
1789 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001790 }
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001791
1792 // Stop polling the lowmem device until the timer fires.
1793 poll_ctx.delete(low_mem).map_err(Error::PollContextDelete)?;
1794
1795 // Add some jitter to the timer so that if there are multiple VMs running
1796 // they don't all start ballooning at exactly the same time.
Daniel Prilik22006042019-01-14 14:19:04 -08001797 let lowmem_dur = Duration::from_millis(1000 + simple_rng.rng() % 200);
Zach Reizner55a9e502018-10-03 10:22:32 -07001798 lowmem_timer
1799 .reset(lowmem_dur, None)
1800 .map_err(Error::ResetTimerFd)?;
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001801
1802 // Also start a timer to check when we can start giving memory back. Do the
1803 // first check after a minute (with jitter) and subsequent checks after
1804 // every 30 seconds (with jitter).
Daniel Prilik22006042019-01-14 14:19:04 -08001805 let freemem_dur = Duration::from_secs(60 + simple_rng.rng() % 12);
1806 let freemem_int = Duration::from_secs(30 + simple_rng.rng() % 6);
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001807 freemem_timer
1808 .reset(freemem_dur, Some(freemem_int))
1809 .map_err(Error::ResetTimerFd)?;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001810 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001811 }
1812 Token::LowmemTimer => {
1813 // Acknowledge the timer.
1814 lowmem_timer.wait().map_err(Error::TimerFd)?;
1815
David Tolnay64cd5ea2019-04-15 15:56:35 -07001816 if let Some(low_mem) = &low_mem {
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001817 // Start polling the lowmem device again.
Zach Reizner55a9e502018-10-03 10:22:32 -07001818 poll_ctx
1819 .add(low_mem, Token::LowMemory)
1820 .map_err(Error::PollContextAdd)?;
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001821 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001822 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001823 Token::VmControlServer => {
1824 if let Some(socket_server) = &control_server_socket {
1825 match socket_server.accept() {
1826 Ok(socket) => {
1827 poll_ctx
1828 .add(
1829 &socket,
1830 Token::VmControl {
1831 index: control_sockets.len(),
1832 },
1833 )
1834 .map_err(Error::PollContextAdd)?;
Jakub Starond99cd0a2019-04-11 14:09:39 -07001835 control_sockets
1836 .push(TaggedControlSocket::Vm(MsgSocket::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08001837 }
1838 Err(e) => error!("failed to accept socket: {}", e),
1839 }
1840 }
1841 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001842 Token::VmControl { index } => {
Daniel Verkamp37c4a782019-01-04 10:44:17 -08001843 if let Some(socket) = control_sockets.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001844 match socket {
1845 TaggedControlSocket::Vm(socket) => match socket.recv() {
1846 Ok(request) => {
1847 let mut run_mode_opt = None;
1848 let response = request.execute(
1849 &mut run_mode_opt,
1850 &balloon_host_socket,
1851 disk_host_sockets,
1852 &usb_control_socket,
1853 );
1854 if let Err(e) = socket.send(&response) {
1855 error!("failed to send VmResponse: {}", e);
1856 }
1857 if let Some(run_mode) = run_mode_opt {
1858 info!("control socket changed run mode to {}", run_mode);
1859 match run_mode {
1860 VmRunMode::Exiting => {
1861 break 'poll;
1862 }
1863 other => {
1864 run_mode_arc.set_and_notify(other);
1865 for handle in &vcpu_handles {
1866 let _ = handle.kill(SIGRTMIN() + 0);
1867 }
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001868 }
1869 }
1870 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001871 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001872 Err(e) => {
1873 if let MsgError::BadRecvSize { actual: 0, .. } = e {
1874 vm_control_indices_to_remove.push(index);
1875 } else {
1876 error!("failed to recv VmRequest: {}", e);
1877 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001878 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001879 },
Gurchetan Singh53edb812019-05-22 08:57:16 -07001880 TaggedControlSocket::VmMemory(socket) => match socket.recv() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001881 Ok(request) => {
1882 let response =
1883 request.execute(&mut linux.vm, &mut linux.resources);
1884 if let Err(e) = socket.send(&response) {
Gurchetan Singh53edb812019-05-22 08:57:16 -07001885 error!("failed to send VmMemoryControlResponse: {}", e);
Jakub Starond99cd0a2019-04-11 14:09:39 -07001886 }
1887 }
1888 Err(e) => {
1889 if let MsgError::BadRecvSize { actual: 0, .. } = e {
1890 vm_control_indices_to_remove.push(index);
1891 } else {
Gurchetan Singh53edb812019-05-22 08:57:16 -07001892 error!("failed to recv VmMemoryControlRequest: {}", e);
Jakub Starond99cd0a2019-04-11 14:09:39 -07001893 }
1894 }
1895 },
Xiong Zhang2515b752019-09-19 10:29:02 +08001896 TaggedControlSocket::VmIrq(socket) => match socket.recv() {
1897 Ok(request) => {
1898 let response =
1899 request.execute(&mut linux.vm, &mut linux.resources);
1900 if let Err(e) = socket.send(&response) {
1901 error!("failed to send VmIrqResponse: {}", e);
1902 }
1903 }
1904 Err(e) => {
1905 if let MsgError::BadRecvSize { actual: 0, .. } = e {
1906 vm_control_indices_to_remove.push(index);
1907 } else {
1908 error!("failed to recv VmIrqRequest: {}", e);
1909 }
1910 }
1911 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08001912 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001913 }
1914 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001915 }
1916 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001917
Zach Reizner5bed0d22018-03-28 02:31:11 -07001918 for event in events.iter_hungup() {
Zach Reiznera60744b2019-02-13 17:33:32 -08001919 match event.token() {
1920 Token::Exit => {}
Zach Reiznera60744b2019-02-13 17:33:32 -08001921 Token::ChildSignal => {}
1922 Token::CheckAvailableMemory => {}
1923 Token::LowMemory => {}
1924 Token::LowmemTimer => {}
1925 Token::VmControlServer => {}
1926 Token::VmControl { index } => {
1927 // It's possible more data is readable and buffered while the socket is hungup,
1928 // so don't delete the socket from the poll context until we're sure all the
1929 // data is read.
Jakub Starond99cd0a2019-04-11 14:09:39 -07001930 match control_sockets
1931 .get(index)
1932 .map(|s| s.as_ref().get_readable_bytes())
1933 {
Zach Reiznera60744b2019-02-13 17:33:32 -08001934 Some(Ok(0)) | Some(Err(_)) => vm_control_indices_to_remove.push(index),
1935 Some(Ok(x)) => info!("control index {} has {} bytes readable", index, x),
1936 _ => {}
Zach Reizner55a9e502018-10-03 10:22:32 -07001937 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001938 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001939 }
1940 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001941
1942 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08001943 // preserves correct indexes as each element is removed.
Zach Reiznera60744b2019-02-13 17:33:32 -08001944 vm_control_indices_to_remove.sort_unstable_by(|a, b| b.cmp(a));
1945 vm_control_indices_to_remove.dedup();
1946 for index in vm_control_indices_to_remove {
Zide Chen89584072019-11-14 10:33:51 -08001947 // Delete the socket from the `poll_ctx` synchronously. Otherwise, the kernel will do
1948 // this automatically when the FD inserted into the `poll_ctx` is closed after this
1949 // if-block, but this removal can be deferred unpredictably. In some instances where the
1950 // system is under heavy load, we can even get events returned by `poll_ctx` for an FD
1951 // that has already been closed. Because the token associated with that spurious event
1952 // now belongs to a different socket, the control loop will start to interact with
1953 // sockets that might not be ready to use. This can cause incorrect hangup detection or
1954 // blocking on a socket that will never be ready. See also: crbug.com/1019986
1955 if let Some(socket) = control_sockets.get(index) {
1956 poll_ctx.delete(socket).map_err(Error::PollContextDelete)?;
1957 }
1958
1959 // This line implicitly drops the socket at `index` when it gets returned by
1960 // `swap_remove`. After this line, the socket at `index` is not the one from
1961 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
1962 // use `poll_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznera60744b2019-02-13 17:33:32 -08001963 control_sockets.swap_remove(index);
1964 if let Some(socket) = control_sockets.get(index) {
1965 poll_ctx
Xiong Zhang44bb3dd2019-04-23 17:09:50 +08001966 .modify(
1967 socket,
1968 WatchingEvents::empty().set_read(),
1969 Token::VmControl { index },
1970 )
Zach Reiznera60744b2019-02-13 17:33:32 -08001971 .map_err(Error::PollContextAdd)?;
1972 }
1973 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001974 }
1975
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001976 // VCPU threads MUST see the VmRunMode flag, otherwise they may re-enter the VM.
1977 run_mode_arc.set_and_notify(VmRunMode::Exiting);
Dylan Reid059a1882018-07-23 17:58:09 -07001978 for handle in vcpu_handles {
Dmitry Torokhovcd405332018-02-16 16:25:54 -08001979 match handle.kill(SIGRTMIN() + 0) {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001980 Ok(_) => {
1981 if let Err(e) = handle.join() {
1982 error!("failed to join vcpu thread: {:?}", e);
1983 }
1984 }
David Tolnayb4bd00f2019-02-12 17:51:26 -08001985 Err(e) => error!("failed to kill vcpu thread: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -08001986 }
1987 }
1988
Daniel Verkamp94c35272019-09-12 13:31:30 -07001989 // Explicitly drop the VM structure here to allow the devices to clean up before the
1990 // control sockets are closed when this function exits.
1991 mem::drop(linux);
1992
Zach Reizner19ad1f32019-12-12 18:58:50 -08001993 stdin()
Zach Reizner39aa26b2017-12-12 18:03:23 -08001994 .set_canon_mode()
1995 .expect("failed to restore canonical mode for terminal");
1996
1997 Ok(())
1998}