blob: fc4c63e756fb25cb7cd9b8ce2f6e6a7fa2cca21f [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use std;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07006use std::cmp::min;
Jakub Starona3411ea2019-04-24 10:55:25 -07007use std::convert::TryFrom;
David Tolnayfdac5ed2019-03-08 16:56:14 -08008use std::error::Error as StdError;
Dylan Reid059a1882018-07-23 17:58:09 -07009use std::ffi::CStr;
David Tolnayc69f9752019-03-01 18:07:56 -080010use std::fmt::{self, Display};
Dylan Reid059a1882018-07-23 17:58:09 -070011use std::fs::{File, OpenOptions};
Zach Reizner55a9e502018-10-03 10:22:32 -070012use std::io::{self, stdin, Read};
Daniel Verkamp94c35272019-09-12 13:31:30 -070013use std::mem;
David Tolnay2b089fc2019-03-04 15:33:22 -080014use std::net::Ipv4Addr;
Daniel Verkamp6f9215c2019-08-20 09:41:22 -070015#[cfg(feature = "gpu")]
Zach Reizner0f2cfb02019-06-19 17:46:03 -070016use std::num::NonZeroU8;
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +090017use std::num::ParseIntError;
Jakub Starond99cd0a2019-04-11 14:09:39 -070018use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
Zach Reiznera60744b2019-02-13 17:33:32 -080019use std::os::unix::net::UnixStream;
Zach Reizner39aa26b2017-12-12 18:03:23 -080020use std::path::{Path, PathBuf};
Chirantan Ekbote448516e2018-07-24 16:07:42 -070021use std::str;
Dylan Reid059a1882018-07-23 17:58:09 -070022use std::sync::{Arc, Barrier};
Zach Reizner39aa26b2017-12-12 18:03:23 -080023use std::thread;
24use std::thread::JoinHandle;
Daniel Prilik22006042019-01-14 14:19:04 -080025use std::time::{Duration, SystemTime, UNIX_EPOCH};
Zach Reizner39aa26b2017-12-12 18:03:23 -080026
David Tolnay41a6f842019-03-01 16:18:44 -080027use libc::{self, c_int, gid_t, uid_t};
Zach Reizner39aa26b2017-12-12 18:03:23 -080028
Dylan Reid3082e8e2019-01-07 10:33:48 -080029use audio_streams::DummyStreamSource;
David Tolnay2b089fc2019-03-04 15:33:22 -080030use devices::virtio::{self, VirtioDevice};
Xiong Zhang17b0daf2019-04-23 17:14:50 +080031use devices::{
32 self, HostBackendDeviceProvider, PciDevice, VfioDevice, VfioPciDevice, VirtioPciDevice,
33 XhciController,
34};
Zach Reizner39aa26b2017-12-12 18:03:23 -080035use io_jail::{self, Minijail};
Zach Reizner39aa26b2017-12-12 18:03:23 -080036use kvm::*;
paulhsiaf052cfe2019-01-22 15:22:25 +080037use libcras::CrasClient;
Zach Reiznera60744b2019-02-13 17:33:32 -080038use msg_socket::{MsgError, MsgReceiver, MsgSender, MsgSocket};
David Tolnay2b089fc2019-03-04 15:33:22 -080039use net_util::{Error as NetError, MacAddress, Tap};
Daniel Prilik22006042019-01-14 14:19:04 -080040use rand_ish::SimpleRng;
David Tolnay3df35522019-03-11 12:36:30 -070041use remain::sorted;
Xiong Zhang87a3b442019-10-29 17:32:44 +080042use resources::{Alloc, MmioType, SystemAllocator};
Zach Reizner6a8fdd92019-01-16 14:38:41 -080043use sync::{Condvar, Mutex};
Jakub Starond99cd0a2019-04-11 14:09:39 -070044use sys_util::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Jakub Starona3411ea2019-04-24 10:55:25 -070045
Zach Reiznera60744b2019-02-13 17:33:32 -080046use sys_util::{
David Tolnay633426a2019-04-12 12:18:35 -070047 self, block_signal, clear_signal, drop_capabilities, error, flock, get_blocked_signals,
Fletcher Woodruff82ff3972019-10-02 13:11:34 -060048 get_group_id, get_user_id, getegid, geteuid, info, register_rt_signal_handler,
49 set_cpu_affinity, validate_raw_fd, warn, EventFd, FlockOperation, GuestAddress, GuestMemory,
50 Killable, MemoryMapping, PollContext, PollToken, Protection, SignalFd, Terminal, TimerFd,
51 WatchingEvents, SIGRTMIN,
Zach Reiznera60744b2019-02-13 17:33:32 -080052};
Jason D. Clinton865323d2017-09-27 22:04:03 -060053use vhost;
Jakub Starone7c59052019-04-09 12:31:14 -070054use vm_control::{
Jakub Staron1f828d72019-04-11 12:49:29 -070055 BalloonControlCommand, BalloonControlRequestSocket, BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -070056 DiskControlCommand, DiskControlRequestSocket, DiskControlResponseSocket, DiskControlResult,
Xiong Zhanga5d248c2019-09-17 14:17:19 -070057 UsbControlSocket, VmControlResponseSocket, VmIrqRequest, VmIrqResponse, VmIrqResponseSocket,
58 VmMemoryControlRequestSocket, VmMemoryControlResponseSocket, VmMemoryRequest, VmMemoryResponse,
59 VmRunMode,
Jakub Starone7c59052019-04-09 12:31:14 -070060};
Zach Reizner39aa26b2017-12-12 18:03:23 -080061
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +090062use crate::{Config, DiskOption, Executable, SharedDir, SharedDirKind, TouchDeviceOption};
Zach Reizner39aa26b2017-12-12 18:03:23 -080063
Cody Schuffelen6d1ab502019-05-21 12:12:38 -070064use arch::{self, LinuxArch, RunnableLinuxVm, VirtioDeviceStub, VmComponents, VmImage};
Sonny Raoed517d12018-02-13 22:09:43 -080065
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080066#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
67use aarch64::AArch64 as Arch;
Zach Reizner55a9e502018-10-03 10:22:32 -070068#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
69use x86_64::X8664arch as Arch;
Zach Reizner39aa26b2017-12-12 18:03:23 -080070
Lepton Wu60893882018-11-21 11:06:18 -080071#[cfg(feature = "gpu-forward")]
David Tolnayaecf9a42019-04-11 14:30:00 -070072use render_node_forward::*;
Lepton Wu60893882018-11-21 11:06:18 -080073#[cfg(not(feature = "gpu-forward"))]
74type RenderNodeHost = ();
75
David Tolnay3df35522019-03-11 12:36:30 -070076#[sorted]
Dylan Reid059a1882018-07-23 17:58:09 -070077#[derive(Debug)]
Zach Reizner39aa26b2017-12-12 18:03:23 -080078pub enum Error {
Lepton Wu60893882018-11-21 11:06:18 -080079 AddGpuDeviceMemory(sys_util::Error),
Jakub Starona3411ea2019-04-24 10:55:25 -070080 AddPmemDeviceMemory(sys_util::Error),
Lepton Wu60893882018-11-21 11:06:18 -080081 AllocateGpuDeviceAddress,
Jakub Starona3411ea2019-04-24 10:55:25 -070082 AllocatePmemDeviceAddress(resources::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -080083 BalloonDeviceNew(virtio::BalloonError),
Zach Reizner39aa26b2017-12-12 18:03:23 -080084 BlockDeviceNew(sys_util::Error),
Mark Ryan6ed5aea2018-04-20 13:52:35 +010085 BlockSignal(sys_util::signal::Error),
David Tolnaybe034262019-03-04 17:48:36 -080086 BuildVm(<Arch as LinuxArch>::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -080087 ChownTpmStorage(sys_util::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080088 CloneEventFd(sys_util::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -080089 CreateCrasClient(libcras::Error),
Cody Schuffelen7d533e52019-07-02 16:54:05 -070090 CreateDiskError(disk::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080091 CreateEventFd(sys_util::Error),
Zach Reizner5bed0d22018-03-28 02:31:11 -070092 CreatePollContext(sys_util::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080093 CreateSignalFd(sys_util::SignalFdError),
94 CreateSocket(io::Error),
Chirantan Ekbote49fa08f2018-11-16 13:26:53 -080095 CreateTapDevice(NetError),
Chirantan Ekbote448516e2018-07-24 16:07:42 -070096 CreateTimerFd(sys_util::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -080097 CreateTpmStorage(PathBuf, io::Error),
Jingkui Wang100e6e42019-03-08 20:41:57 -080098 CreateUsbProvider(devices::usb::host_backend::error::Error),
Xiong Zhang17b0daf2019-04-23 17:14:50 +080099 CreateVfioDevice(devices::vfio::VfioError),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800100 DeviceJail(io_jail::Error),
101 DevicePivotRoot(io_jail::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800102 Disk(io::Error),
Stephen Barberc79de2d2018-02-21 14:17:27 -0800103 DiskImageLock(sys_util::Error),
Dmitry Torokhov71006072019-03-06 10:56:51 -0800104 DropCapabilities(sys_util::Error),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900105 FsDeviceNew(virtio::fs::Error),
106 GetMaxOpenFiles(io::Error),
Lepton Wu39133a02019-02-27 12:42:29 -0800107 InputDeviceNew(virtio::InputError),
108 InputEventsOpen(std::io::Error),
Dylan Reid20566442018-04-02 15:06:15 -0700109 InvalidFdPath,
Zach Reizner579bd2c2018-09-14 15:43:33 -0700110 InvalidWaylandPath,
David Tolnayfd0971d2019-03-04 17:15:57 -0800111 IoJail(io_jail::Error),
David Tolnayfdac5ed2019-03-08 16:56:14 -0800112 LoadKernel(Box<dyn StdError>),
David Tolnay2b089fc2019-03-04 15:33:22 -0800113 NetDeviceNew(virtio::NetError),
Tristan Muntsinger4133b012018-12-21 16:01:56 -0800114 OpenAndroidFstab(PathBuf, io::Error),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700115 OpenBios(PathBuf, io::Error),
Daniel Verkampe403f5c2018-12-11 16:29:26 -0800116 OpenInitrd(PathBuf, io::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800117 OpenKernel(PathBuf, io::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -0800118 OpenVinput(PathBuf, io::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800119 P9DeviceNew(virtio::P9Error),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900120 ParseMaxOpenFiles(ParseIntError),
Lepton Wu39133a02019-02-27 12:42:29 -0800121 PivotRootDoesntExist(&'static str),
Jakub Starona3411ea2019-04-24 10:55:25 -0700122 PmemDeviceImageTooBig,
123 PmemDeviceNew(sys_util::Error),
Zach Reizner5bed0d22018-03-28 02:31:11 -0700124 PollContextAdd(sys_util::Error),
Chirantan Ekbote448516e2018-07-24 16:07:42 -0700125 PollContextDelete(sys_util::Error),
Chirantan Ekbote448516e2018-07-24 16:07:42 -0700126 ReadLowmemAvailable(io::Error),
127 ReadLowmemMargin(io::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700128 RegisterBalloon(arch::DeviceRegistrationError),
129 RegisterBlock(arch::DeviceRegistrationError),
130 RegisterGpu(arch::DeviceRegistrationError),
131 RegisterNet(arch::DeviceRegistrationError),
132 RegisterP9(arch::DeviceRegistrationError),
133 RegisterRng(arch::DeviceRegistrationError),
Mark Ryan6ed5aea2018-04-20 13:52:35 +0100134 RegisterSignalHandler(sys_util::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700135 RegisterWayland(arch::DeviceRegistrationError),
Lepton Wu60893882018-11-21 11:06:18 -0800136 ReserveGpuMemory(sys_util::MmapError),
137 ReserveMemory(sys_util::Error),
Jakub Starona3411ea2019-04-24 10:55:25 -0700138 ReservePmemMemory(sys_util::MmapError),
Chirantan Ekbote448516e2018-07-24 16:07:42 -0700139 ResetTimerFd(sys_util::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800140 RngDeviceNew(virtio::RngError),
Zach Reizner8fb52112017-12-13 16:04:39 -0800141 SettingGidMap(io_jail::Error),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900142 SettingMaxOpenFiles(io_jail::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800143 SettingUidMap(io_jail::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800144 SignalFd(sys_util::SignalFdError),
145 SpawnVcpu(io::Error),
Chirantan Ekbote448516e2018-07-24 16:07:42 -0700146 TimerFd(sys_util::Error),
Chirantan Ekbote2d292332018-11-16 11:35:24 -0800147 ValidateRawFd(sys_util::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800148 VhostNetDeviceNew(virtio::vhost::Error),
149 VhostVsockDeviceNew(virtio::vhost::Error),
Daniel Verkamp56f283b2018-10-05 11:40:59 -0700150 VirtioPciDev(sys_util::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800151 WaylandDeviceNew(sys_util::Error),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800152}
153
David Tolnayc69f9752019-03-01 18:07:56 -0800154impl Display for Error {
David Tolnay3df35522019-03-11 12:36:30 -0700155 #[remain::check]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800156 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
David Tolnayc69f9752019-03-01 18:07:56 -0800157 use self::Error::*;
158
David Tolnay3df35522019-03-11 12:36:30 -0700159 #[sorted]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800160 match self {
Lepton Wu60893882018-11-21 11:06:18 -0800161 AddGpuDeviceMemory(e) => write!(f, "failed to add gpu device memory: {}", e),
Jakub Starona3411ea2019-04-24 10:55:25 -0700162 AddPmemDeviceMemory(e) => write!(f, "failed to add pmem device memory: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800163 AllocateGpuDeviceAddress => write!(f, "failed to allocate gpu device guest address"),
Jakub Starona3411ea2019-04-24 10:55:25 -0700164 AllocatePmemDeviceAddress(e) => {
165 write!(f, "failed to allocate memory for pmem device: {}", e)
166 }
David Tolnayc69f9752019-03-01 18:07:56 -0800167 BalloonDeviceNew(e) => write!(f, "failed to create balloon: {}", e),
168 BlockDeviceNew(e) => write!(f, "failed to create block device: {}", e),
169 BlockSignal(e) => write!(f, "failed to block signal: {}", e),
David Tolnaybe034262019-03-04 17:48:36 -0800170 BuildVm(e) => write!(f, "The architecture failed to build the vm: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800171 ChownTpmStorage(e) => write!(f, "failed to chown tpm storage: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800172 CloneEventFd(e) => write!(f, "failed to clone eventfd: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800173 CreateCrasClient(e) => write!(f, "failed to create cras client: {}", e),
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700174 CreateDiskError(e) => write!(f, "failed to create virtual disk: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800175 CreateEventFd(e) => write!(f, "failed to create eventfd: {}", e),
176 CreatePollContext(e) => write!(f, "failed to create poll context: {}", e),
177 CreateSignalFd(e) => write!(f, "failed to create signalfd: {}", e),
178 CreateSocket(e) => write!(f, "failed to create socket: {}", e),
179 CreateTapDevice(e) => write!(f, "failed to create tap device: {}", e),
180 CreateTimerFd(e) => write!(f, "failed to create timerfd: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800181 CreateTpmStorage(p, e) => {
182 write!(f, "failed to create tpm storage dir {}: {}", p.display(), e)
183 }
Jingkui Wang100e6e42019-03-08 20:41:57 -0800184 CreateUsbProvider(e) => write!(f, "failed to create usb provider: {}", e),
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800185 CreateVfioDevice(e) => write!(f, "Failed to create vfio device {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800186 DeviceJail(e) => write!(f, "failed to jail device: {}", e),
187 DevicePivotRoot(e) => write!(f, "failed to pivot root device: {}", e),
188 Disk(e) => write!(f, "failed to load disk image: {}", e),
189 DiskImageLock(e) => write!(f, "failed to lock disk image: {}", e),
Dmitry Torokhov71006072019-03-06 10:56:51 -0800190 DropCapabilities(e) => write!(f, "failed to drop process capabilities: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900191 FsDeviceNew(e) => write!(f, "failed to create fs device: {}", e),
192 GetMaxOpenFiles(e) => write!(f, "failed to get max number of open files: {}", e),
David Tolnay64cd5ea2019-04-15 15:56:35 -0700193 InputDeviceNew(e) => write!(f, "failed to set up input device: {}", e),
194 InputEventsOpen(e) => write!(f, "failed to open event device: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800195 InvalidFdPath => write!(f, "failed parsing a /proc/self/fd/*"),
196 InvalidWaylandPath => write!(f, "wayland socket path has no parent or file name"),
David Tolnayfd0971d2019-03-04 17:15:57 -0800197 IoJail(e) => write!(f, "{}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800198 LoadKernel(e) => write!(f, "failed to load kernel: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800199 NetDeviceNew(e) => write!(f, "failed to set up virtio networking: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800200 OpenAndroidFstab(p, e) => write!(
David Tolnayb4bd00f2019-02-12 17:51:26 -0800201 f,
202 "failed to open android fstab file {}: {}",
203 p.display(),
204 e
205 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700206 OpenBios(p, e) => write!(f, "failed to open bios {}: {}", p.display(), e),
David Tolnay3df35522019-03-11 12:36:30 -0700207 OpenInitrd(p, e) => write!(f, "failed to open initrd {}: {}", p.display(), e),
208 OpenKernel(p, e) => write!(f, "failed to open kernel image {}: {}", p.display(), e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800209 OpenVinput(p, e) => write!(f, "failed to open vinput device {}: {}", p.display(), e),
David Tolnayc69f9752019-03-01 18:07:56 -0800210 P9DeviceNew(e) => write!(f, "failed to create 9p device: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900211 ParseMaxOpenFiles(e) => write!(f, "failed to parse max number of open files: {}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800212 PivotRootDoesntExist(p) => write!(f, "{} doesn't exist, can't jail devices.", p),
Jakub Starona3411ea2019-04-24 10:55:25 -0700213 PmemDeviceImageTooBig => {
214 write!(f, "failed to create pmem device: pmem device image too big")
215 }
216 PmemDeviceNew(e) => write!(f, "failed to create pmem device: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800217 PollContextAdd(e) => write!(f, "failed to add fd to poll context: {}", e),
218 PollContextDelete(e) => write!(f, "failed to remove fd from poll context: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800219 ReadLowmemAvailable(e) => write!(
Zach Reizner55a9e502018-10-03 10:22:32 -0700220 f,
221 "failed to read /sys/kernel/mm/chromeos-low_mem/available: {}",
222 e
223 ),
David Tolnayc69f9752019-03-01 18:07:56 -0800224 ReadLowmemMargin(e) => write!(
Zach Reizner55a9e502018-10-03 10:22:32 -0700225 f,
226 "failed to read /sys/kernel/mm/chromeos-low_mem/margin: {}",
227 e
228 ),
David Tolnayc69f9752019-03-01 18:07:56 -0800229 RegisterBalloon(e) => write!(f, "error registering balloon device: {}", e),
230 RegisterBlock(e) => write!(f, "error registering block device: {}", e),
231 RegisterGpu(e) => write!(f, "error registering gpu device: {}", e),
232 RegisterNet(e) => write!(f, "error registering net device: {}", e),
233 RegisterP9(e) => write!(f, "error registering 9p device: {}", e),
234 RegisterRng(e) => write!(f, "error registering rng device: {}", e),
235 RegisterSignalHandler(e) => write!(f, "error registering signal handler: {}", e),
236 RegisterWayland(e) => write!(f, "error registering wayland device: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800237 ReserveGpuMemory(e) => write!(f, "failed to reserve gpu memory: {}", e),
238 ReserveMemory(e) => write!(f, "failed to reserve memory: {}", e),
Jakub Starona3411ea2019-04-24 10:55:25 -0700239 ReservePmemMemory(e) => write!(f, "failed to reserve pmem memory: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800240 ResetTimerFd(e) => write!(f, "failed to reset timerfd: {}", e),
241 RngDeviceNew(e) => write!(f, "failed to set up rng: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800242 SettingGidMap(e) => write!(f, "error setting GID map: {}", e),
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900243 SettingMaxOpenFiles(e) => write!(f, "error setting max open files: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800244 SettingUidMap(e) => write!(f, "error setting UID map: {}", e),
245 SignalFd(e) => write!(f, "failed to read signal fd: {}", e),
246 SpawnVcpu(e) => write!(f, "failed to spawn VCPU thread: {}", e),
247 TimerFd(e) => write!(f, "failed to read timer fd: {}", e),
248 ValidateRawFd(e) => write!(f, "failed to validate raw fd: {}", e),
249 VhostNetDeviceNew(e) => write!(f, "failed to set up vhost networking: {}", e),
250 VhostVsockDeviceNew(e) => write!(f, "failed to set up virtual socket device: {}", e),
251 VirtioPciDev(e) => write!(f, "failed to create virtio pci dev: {}", e),
252 WaylandDeviceNew(e) => write!(f, "failed to create wayland device: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800253 }
254 }
255}
256
David Tolnayfd0971d2019-03-04 17:15:57 -0800257impl From<io_jail::Error> for Error {
258 fn from(err: io_jail::Error) -> Self {
259 Error::IoJail(err)
260 }
261}
262
David Tolnayc69f9752019-03-01 18:07:56 -0800263impl std::error::Error for Error {}
Dylan Reid059a1882018-07-23 17:58:09 -0700264
Zach Reizner39aa26b2017-12-12 18:03:23 -0800265type Result<T> = std::result::Result<T, Error>;
266
Jakub Starond99cd0a2019-04-11 14:09:39 -0700267enum TaggedControlSocket {
268 Vm(VmControlResponseSocket),
Gurchetan Singh53edb812019-05-22 08:57:16 -0700269 VmMemory(VmMemoryControlResponseSocket),
Xiong Zhang2515b752019-09-19 10:29:02 +0800270 VmIrq(VmIrqResponseSocket),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700271}
272
273impl AsRef<UnixSeqpacket> for TaggedControlSocket {
274 fn as_ref(&self) -> &UnixSeqpacket {
275 use self::TaggedControlSocket::*;
276 match &self {
277 Vm(ref socket) => socket,
Gurchetan Singh53edb812019-05-22 08:57:16 -0700278 VmMemory(ref socket) => socket,
Xiong Zhang2515b752019-09-19 10:29:02 +0800279 VmIrq(ref socket) => socket,
Jakub Starond99cd0a2019-04-11 14:09:39 -0700280 }
281 }
282}
283
284impl AsRawFd for TaggedControlSocket {
285 fn as_raw_fd(&self) -> RawFd {
286 self.as_ref().as_raw_fd()
287 }
288}
289
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900290fn get_max_open_files() -> Result<libc::rlim_t> {
291 let mut buf = String::with_capacity(32);
292 File::open("/proc/sys/fs/file-max")
293 .and_then(|mut f| f.read_to_string(&mut buf))
294 .map_err(Error::GetMaxOpenFiles)?;
295
296 Ok(buf.trim().parse().map_err(Error::ParseMaxOpenFiles)?)
297}
298
Zach Reizner44863792019-06-26 14:22:08 -0700299fn create_base_minijail(
300 root: &Path,
301 log_failures: bool,
302 seccomp_policy: &Path,
303) -> Result<Minijail> {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800304 // All child jails run in a new user namespace without any users mapped,
305 // they run as nobody unless otherwise configured.
David Tolnay5bbbf612018-12-01 17:49:30 -0800306 let mut j = Minijail::new().map_err(Error::DeviceJail)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -0800307 j.namespace_pids();
308 j.namespace_user();
309 j.namespace_user_disable_setgroups();
310 // Don't need any capabilities.
311 j.use_caps(0);
312 // Create a new mount namespace with an empty root FS.
313 j.namespace_vfs();
David Tolnay5bbbf612018-12-01 17:49:30 -0800314 j.enter_pivot_root(root).map_err(Error::DevicePivotRoot)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -0800315 // Run in an empty network namespace.
316 j.namespace_net();
317 // Apply the block device seccomp policy.
318 j.no_new_privs();
Stephen Barber3b1d8a52018-01-06 17:34:51 -0800319 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP, which will correctly kill
320 // the entire device process if a worker thread commits a seccomp violation.
321 j.set_seccomp_filter_tsync();
Zach Reizner44863792019-06-26 14:22:08 -0700322 if log_failures {
323 j.log_seccomp_filter_failures();
324 }
Zach Reizner39aa26b2017-12-12 18:03:23 -0800325 j.parse_seccomp_filters(seccomp_policy)
David Tolnay5bbbf612018-12-01 17:49:30 -0800326 .map_err(Error::DeviceJail)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -0800327 j.use_seccomp_filter();
328 // Don't do init setup.
329 j.run_as_init();
330 Ok(j)
331}
332
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800333fn simple_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
Lepton Wu9105e9f2019-03-14 11:38:31 -0700334 if cfg.sandbox {
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800335 let pivot_root: &str = option_env!("DEFAULT_PIVOT_ROOT").unwrap_or("/var/empty");
336 // A directory for a jailed device's pivot root.
337 let root_path = Path::new(pivot_root);
338 if !root_path.exists() {
339 return Err(Error::PivotRootDoesntExist(pivot_root));
340 }
341 let policy_path: PathBuf = cfg.seccomp_policy_dir.join(policy);
Zach Reizner44863792019-06-26 14:22:08 -0700342 Ok(Some(create_base_minijail(
343 root_path,
344 cfg.seccomp_log_failures,
345 &policy_path,
346 )?))
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800347 } else {
348 Ok(None)
349 }
350}
351
David Tolnayfd0971d2019-03-04 17:15:57 -0800352type DeviceResult<T = VirtioDeviceStub> = std::result::Result<T, Error>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800353
354fn create_block_device(
355 cfg: &Config,
356 disk: &DiskOption,
Jakub Staronecf81e02019-04-11 11:43:39 -0700357 disk_device_socket: DiskControlResponseSocket,
David Tolnay2b089fc2019-03-04 15:33:22 -0800358) -> DeviceResult {
359 // Special case '/proc/self/fd/*' paths. The FD is already open, just use it.
360 let raw_image: File = if disk.path.parent() == Some(Path::new("/proc/self/fd")) {
361 // Safe because we will validate |raw_fd|.
362 unsafe { File::from_raw_fd(raw_fd_from_path(&disk.path)?) }
363 } else {
364 OpenOptions::new()
365 .read(true)
366 .write(!disk.read_only)
367 .open(&disk.path)
368 .map_err(Error::Disk)?
369 };
370 // Lock the disk image to prevent other crosvm instances from using it.
371 let lock_op = if disk.read_only {
372 FlockOperation::LockShared
373 } else {
374 FlockOperation::LockExclusive
375 };
376 flock(&raw_image, lock_op, true).map_err(Error::DiskImageLock)?;
377
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700378 let disk_file = disk::create_disk_file(raw_image).map_err(Error::CreateDiskError)?;
Daniel Verkampe73c80f2019-11-08 10:11:16 -0800379 let dev = virtio::Block::new(
380 disk_file,
381 disk.read_only,
382 disk.sparse,
383 Some(disk_device_socket),
384 )
385 .map_err(Error::BlockDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800386
387 Ok(VirtioDeviceStub {
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700388 dev: Box::new(dev),
David Tolnay2b089fc2019-03-04 15:33:22 -0800389 jail: simple_jail(&cfg, "block_device.policy")?,
390 })
391}
392
393fn create_rng_device(cfg: &Config) -> DeviceResult {
394 let dev = virtio::Rng::new().map_err(Error::RngDeviceNew)?;
395
396 Ok(VirtioDeviceStub {
397 dev: Box::new(dev),
398 jail: simple_jail(&cfg, "rng_device.policy")?,
399 })
400}
401
402#[cfg(feature = "tpm")]
403fn create_tpm_device(cfg: &Config) -> DeviceResult {
404 use std::ffi::CString;
405 use std::fs;
406 use std::process;
407 use sys_util::chown;
408
409 let tpm_storage: PathBuf;
410 let mut tpm_jail = simple_jail(&cfg, "tpm_device.policy")?;
411
412 match &mut tpm_jail {
413 Some(jail) => {
414 // Create a tmpfs in the device's root directory for tpm
415 // simulator storage. The size is 20*1024, or 20 KB.
416 jail.mount_with_data(
417 Path::new("none"),
418 Path::new("/"),
419 "tmpfs",
420 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
421 "size=20480",
422 )?;
423
424 let crosvm_ids = add_crosvm_user_to_jail(jail, "tpm")?;
425
426 let pid = process::id();
427 let tpm_pid_dir = format!("/run/vm/tpm.{}", pid);
428 tpm_storage = Path::new(&tpm_pid_dir).to_owned();
David Tolnayfd0971d2019-03-04 17:15:57 -0800429 fs::create_dir_all(&tpm_storage)
430 .map_err(|e| Error::CreateTpmStorage(tpm_storage.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800431 let tpm_pid_dir_c = CString::new(tpm_pid_dir).expect("no nul bytes");
David Tolnayfd0971d2019-03-04 17:15:57 -0800432 chown(&tpm_pid_dir_c, crosvm_ids.uid, crosvm_ids.gid)
433 .map_err(Error::ChownTpmStorage)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800434
435 jail.mount_bind(&tpm_storage, &tpm_storage, true)?;
436 }
437 None => {
438 // Path used inside cros_sdk which does not have /run/vm.
439 tpm_storage = Path::new("/tmp/tpm-simulator").to_owned();
440 }
441 }
442
443 let dev = virtio::Tpm::new(tpm_storage);
444
445 Ok(VirtioDeviceStub {
446 dev: Box::new(dev),
447 jail: tpm_jail,
448 })
449}
450
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800451fn create_single_touch_device(cfg: &Config, single_touch_spec: &TouchDeviceOption) -> DeviceResult {
452 let socket = create_input_socket(&single_touch_spec.path).map_err(|e| {
453 error!("failed configuring virtio single touch: {:?}", e);
454 e
455 })?;
456
457 let dev = virtio::new_single_touch(socket, single_touch_spec.width, single_touch_spec.height)
458 .map_err(Error::InputDeviceNew)?;
459 Ok(VirtioDeviceStub {
460 dev: Box::new(dev),
461 jail: simple_jail(&cfg, "input_device.policy")?,
462 })
463}
464
465fn create_trackpad_device(cfg: &Config, trackpad_spec: &TouchDeviceOption) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800466 let socket = create_input_socket(&trackpad_spec.path).map_err(|e| {
467 error!("failed configuring virtio trackpad: {}", e);
468 e
469 })?;
470
471 let dev = virtio::new_trackpad(socket, trackpad_spec.width, trackpad_spec.height)
472 .map_err(Error::InputDeviceNew)?;
473
474 Ok(VirtioDeviceStub {
475 dev: Box::new(dev),
476 jail: simple_jail(&cfg, "input_device.policy")?,
477 })
478}
479
480fn create_mouse_device(cfg: &Config, mouse_socket: &Path) -> DeviceResult {
481 let socket = create_input_socket(&mouse_socket).map_err(|e| {
482 error!("failed configuring virtio mouse: {}", e);
483 e
484 })?;
485
486 let dev = virtio::new_mouse(socket).map_err(Error::InputDeviceNew)?;
487
488 Ok(VirtioDeviceStub {
489 dev: Box::new(dev),
490 jail: simple_jail(&cfg, "input_device.policy")?,
491 })
492}
493
494fn create_keyboard_device(cfg: &Config, keyboard_socket: &Path) -> DeviceResult {
495 let socket = create_input_socket(&keyboard_socket).map_err(|e| {
496 error!("failed configuring virtio keyboard: {}", e);
497 e
498 })?;
499
500 let dev = virtio::new_keyboard(socket).map_err(Error::InputDeviceNew)?;
501
502 Ok(VirtioDeviceStub {
503 dev: Box::new(dev),
504 jail: simple_jail(&cfg, "input_device.policy")?,
505 })
506}
507
508fn create_vinput_device(cfg: &Config, dev_path: &Path) -> DeviceResult {
509 let dev_file = OpenOptions::new()
510 .read(true)
511 .write(true)
512 .open(dev_path)
David Tolnayfd0971d2019-03-04 17:15:57 -0800513 .map_err(|e| Error::OpenVinput(dev_path.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800514
515 let dev = virtio::new_evdev(dev_file).map_err(Error::InputDeviceNew)?;
516
517 Ok(VirtioDeviceStub {
518 dev: Box::new(dev),
519 jail: simple_jail(&cfg, "input_device.policy")?,
520 })
521}
522
Jakub Staron1f828d72019-04-11 12:49:29 -0700523fn create_balloon_device(cfg: &Config, socket: BalloonControlResponseSocket) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800524 let dev = virtio::Balloon::new(socket).map_err(Error::BalloonDeviceNew)?;
525
526 Ok(VirtioDeviceStub {
527 dev: Box::new(dev),
528 jail: simple_jail(&cfg, "balloon_device.policy")?,
529 })
530}
531
532fn create_tap_net_device(cfg: &Config, tap_fd: RawFd) -> DeviceResult {
533 // Safe because we ensure that we get a unique handle to the fd.
534 let tap = unsafe {
535 Tap::from_raw_fd(validate_raw_fd(tap_fd).map_err(Error::ValidateRawFd)?)
536 .map_err(Error::CreateTapDevice)?
537 };
538
539 let dev = virtio::Net::from(tap).map_err(Error::NetDeviceNew)?;
540
541 Ok(VirtioDeviceStub {
542 dev: Box::new(dev),
543 jail: simple_jail(&cfg, "net_device.policy")?,
544 })
545}
546
547fn create_net_device(
548 cfg: &Config,
549 host_ip: Ipv4Addr,
550 netmask: Ipv4Addr,
551 mac_address: MacAddress,
552 mem: &GuestMemory,
553) -> DeviceResult {
554 let dev = if cfg.vhost_net {
555 let dev =
556 virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(host_ip, netmask, mac_address, mem)
557 .map_err(Error::VhostNetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800558 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800559 } else {
560 let dev =
561 virtio::Net::<Tap>::new(host_ip, netmask, mac_address).map_err(Error::NetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800562 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800563 };
564
565 let policy = if cfg.vhost_net {
566 "vhost_net_device.policy"
567 } else {
568 "net_device.policy"
569 };
570
571 Ok(VirtioDeviceStub {
572 dev,
573 jail: simple_jail(&cfg, policy)?,
574 })
575}
576
577#[cfg(feature = "gpu")]
578fn create_gpu_device(
579 cfg: &Config,
580 exit_evt: &EventFd,
Gurchetan Singh7ec58fa2019-05-15 15:30:38 -0700581 gpu_device_socket: VmMemoryControlRequestSocket,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900582 gpu_sockets: Vec<virtio::resource_bridge::ResourceResponseSocket>,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700583 wayland_socket_path: Option<PathBuf>,
584 x_display: Option<String>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800585) -> DeviceResult {
586 let jailed_wayland_path = Path::new("/wayland-0");
587
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700588 let mut display_backends = vec![
589 virtio::DisplayBackend::X(x_display),
590 virtio::DisplayBackend::Null,
591 ];
592
593 if let Some(socket_path) = wayland_socket_path.as_ref() {
594 display_backends.insert(
595 0,
596 virtio::DisplayBackend::Wayland(if cfg.sandbox {
597 Some(jailed_wayland_path.to_owned())
598 } else {
599 Some(socket_path.to_owned())
600 }),
601 );
602 }
603
David Tolnay2b089fc2019-03-04 15:33:22 -0800604 let dev = virtio::Gpu::new(
605 exit_evt.try_clone().map_err(Error::CloneEventFd)?,
Gurchetan Singh7ec58fa2019-05-15 15:30:38 -0700606 Some(gpu_device_socket),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700607 NonZeroU8::new(1).unwrap(), // number of scanouts
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900608 gpu_sockets,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700609 display_backends,
David Tolnay2b089fc2019-03-04 15:33:22 -0800610 );
611
612 let jail = match simple_jail(&cfg, "gpu_device.policy")? {
613 Some(mut jail) => {
614 // Create a tmpfs in the device's root directory so that we can bind mount the
615 // dri directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
616 jail.mount_with_data(
617 Path::new("none"),
618 Path::new("/"),
619 "tmpfs",
620 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
621 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800622 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800623
624 // Device nodes required for DRM.
625 let sys_dev_char_path = Path::new("/sys/dev/char");
David Tolnayfd0971d2019-03-04 17:15:57 -0800626 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800627 let sys_devices_path = Path::new("/sys/devices");
David Tolnayfd0971d2019-03-04 17:15:57 -0800628 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800629 let drm_dri_path = Path::new("/dev/dri");
David Tolnayfd0971d2019-03-04 17:15:57 -0800630 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800631
David Riley06787c52019-07-24 12:09:07 -0700632 // If the ARM specific devices exist on the host, bind mount them in.
633 let mali0_path = Path::new("/dev/mali0");
634 if mali0_path.exists() {
635 jail.mount_bind(mali0_path, mali0_path, true)?;
636 }
637
638 let pvr_sync_path = Path::new("/dev/pvr_sync");
639 if pvr_sync_path.exists() {
640 jail.mount_bind(pvr_sync_path, pvr_sync_path, true)?;
641 }
642
David Tolnay2b089fc2019-03-04 15:33:22 -0800643 // Libraries that are required when mesa drivers are dynamically loaded.
David Riley06787c52019-07-24 12:09:07 -0700644 let lib_dirs = &["/usr/lib", "/usr/lib64", "/lib", "/lib64"];
645 for dir in lib_dirs {
646 let dir_path = Path::new(dir);
647 if dir_path.exists() {
648 jail.mount_bind(dir_path, dir_path, false)?;
649 }
650 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800651
652 // Bind mount the wayland socket into jail's root. This is necessary since each
653 // new wayland context must open() the socket.
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700654 if let Some(path) = wayland_socket_path {
655 jail.mount_bind(path.as_ref(), jailed_wayland_path, true)?;
656 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800657
658 add_crosvm_user_to_jail(&mut jail, "gpu")?;
659
David Riley54e660b2019-07-24 17:22:50 -0700660 // pvr driver requires read access to /proc/self/task/*/comm.
661 let proc_path = Path::new("/proc");
662 jail.mount(
663 proc_path,
664 proc_path,
665 "proc",
666 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY) as usize,
667 )?;
668
David Tolnay2b089fc2019-03-04 15:33:22 -0800669 Some(jail)
670 }
671 None => None,
672 };
673
674 Ok(VirtioDeviceStub {
675 dev: Box::new(dev),
676 jail,
677 })
678}
679
680fn create_wayland_device(
681 cfg: &Config,
682 socket_path: &Path,
Gurchetan Singh53edb812019-05-22 08:57:16 -0700683 socket: VmMemoryControlRequestSocket,
David Tolnay2b089fc2019-03-04 15:33:22 -0800684 resource_bridge: Option<virtio::resource_bridge::ResourceRequestSocket>,
685) -> DeviceResult {
686 let wayland_socket_dir = socket_path.parent().ok_or(Error::InvalidWaylandPath)?;
687 let wayland_socket_name = socket_path.file_name().ok_or(Error::InvalidWaylandPath)?;
688 let jailed_wayland_dir = Path::new("/wayland");
689 let jailed_wayland_path = jailed_wayland_dir.join(wayland_socket_name);
690
691 let dev = virtio::Wl::new(
Lepton Wu9105e9f2019-03-14 11:38:31 -0700692 if cfg.sandbox {
David Tolnay2b089fc2019-03-04 15:33:22 -0800693 &jailed_wayland_path
694 } else {
695 socket_path
696 },
697 socket,
698 resource_bridge,
699 )
700 .map_err(Error::WaylandDeviceNew)?;
701
702 let jail = match simple_jail(&cfg, "wl_device.policy")? {
703 Some(mut jail) => {
704 // Create a tmpfs in the device's root directory so that we can bind mount the wayland
705 // socket directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
706 jail.mount_with_data(
707 Path::new("none"),
708 Path::new("/"),
709 "tmpfs",
710 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
711 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800712 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800713
714 // Bind mount the wayland socket's directory into jail's root. This is necessary since
715 // each new wayland context must open() the socket. If the wayland socket is ever
716 // destroyed and remade in the same host directory, new connections will be possible
717 // without restarting the wayland device.
David Tolnayfd0971d2019-03-04 17:15:57 -0800718 jail.mount_bind(wayland_socket_dir, jailed_wayland_dir, true)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800719
720 add_crosvm_user_to_jail(&mut jail, "Wayland")?;
721
722 Some(jail)
723 }
724 None => None,
725 };
726
727 Ok(VirtioDeviceStub {
728 dev: Box::new(dev),
729 jail,
730 })
731}
732
733fn create_vhost_vsock_device(cfg: &Config, cid: u64, mem: &GuestMemory) -> DeviceResult {
734 let dev = virtio::vhost::Vsock::new(cid, mem).map_err(Error::VhostVsockDeviceNew)?;
735
736 Ok(VirtioDeviceStub {
737 dev: Box::new(dev),
738 jail: simple_jail(&cfg, "vhost_vsock_device.policy")?,
739 })
740}
741
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +0900742fn create_fs_device(
743 cfg: &Config,
744 uid_map: &str,
745 gid_map: &str,
746 src: &Path,
747 tag: &str,
748 fs_cfg: virtio::fs::passthrough::Config,
749) -> DeviceResult {
750 let mut j = Minijail::new().map_err(Error::DeviceJail)?;
751
752 if cfg.sandbox {
753 j.namespace_pids();
754 j.namespace_user();
755 j.namespace_user_disable_setgroups();
756 j.uidmap(uid_map).map_err(Error::SettingUidMap)?;
757 j.gidmap(gid_map).map_err(Error::SettingGidMap)?;
758
759 // Run in an empty network namespace.
760 j.namespace_net();
761
762 j.no_new_privs();
763
764 // TODO(chirantan): Enable seccomp
765 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP, which will correctly kill
766 // the entire device process if a worker thread commits a seccomp violation.
767 // let seccomp_policy = cfg.seccomp_policy_dir.join("9p_device.policy");
768 // j.set_seccomp_filter_tsync();
769 // if cfg.seccomp_log_failures {
770 // j.log_seccomp_filter_failures();
771 // }
772 // j.parse_seccomp_filters(&seccomp_policy)
773 // .map_err(Error::DeviceJail)?;
774 // j.use_seccomp_filter();
775
776 // Don't do init setup.
777 j.run_as_init();
778 }
779
780 // Create a new mount namespace with the source directory as the root. We need this even when
781 // sandboxing is disabled as the server relies on the host kernel to prevent path traversals
782 // from leaking out of the shared directory.
783 j.namespace_vfs();
784 j.enter_pivot_root(src).map_err(Error::DevicePivotRoot)?;
785
786 // The file server opens a lot of fds and needs a really high open file limit.
787 let max_open_files = get_max_open_files()?;
788 j.set_rlimit(libc::RLIMIT_NOFILE, max_open_files, max_open_files)
789 .map_err(Error::SettingMaxOpenFiles)?;
790
791 // TODO(chirantan): Use more than one worker once the kernel driver has been fixed to not panic
792 // when num_queues > 1.
793 let dev = virtio::fs::Fs::new(tag, 1, fs_cfg).map_err(Error::FsDeviceNew)?;
794
795 Ok(VirtioDeviceStub {
796 dev: Box::new(dev),
797 jail: Some(j),
798 })
799}
800
David Tolnay2b089fc2019-03-04 15:33:22 -0800801fn create_9p_device(cfg: &Config, chronos: Ids, src: &Path, tag: &str) -> DeviceResult {
802 let (jail, root) = match simple_jail(&cfg, "9p_device.policy")? {
803 Some(mut jail) => {
804 // The shared directory becomes the root of the device's file system.
805 let root = Path::new("/");
David Tolnayfd0971d2019-03-04 17:15:57 -0800806 jail.mount_bind(src, root, true)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800807
808 // Set the uid/gid for the jailed process, and give a basic id map. This
809 // is required for the above bind mount to work.
810 jail.change_uid(chronos.uid);
811 jail.change_gid(chronos.gid);
812 jail.uidmap(&format!("{0} {0} 1", chronos.uid))
813 .map_err(Error::SettingUidMap)?;
814 jail.gidmap(&format!("{0} {0} 1", chronos.gid))
815 .map_err(Error::SettingGidMap)?;
816
817 (Some(jail), root)
818 }
819 None => {
820 // There's no bind mount so we tell the server to treat the source directory as the
David Tolnay9deb7d72019-03-05 18:25:44 -0800821 // root.
David Tolnay2b089fc2019-03-04 15:33:22 -0800822 (None, src)
823 }
824 };
825
826 let dev = virtio::P9::new(root, tag).map_err(Error::P9DeviceNew)?;
827
828 Ok(VirtioDeviceStub {
829 dev: Box::new(dev),
830 jail,
831 })
832}
833
Jakub Starona3411ea2019-04-24 10:55:25 -0700834fn create_pmem_device(
835 cfg: &Config,
836 vm: &mut Vm,
837 resources: &mut SystemAllocator,
838 disk: &DiskOption,
839 index: usize,
840) -> DeviceResult {
841 let fd = OpenOptions::new()
842 .read(true)
843 .write(!disk.read_only)
844 .open(&disk.path)
845 .map_err(Error::Disk)?;
846
847 let image_size = {
848 let metadata = std::fs::metadata(&disk.path).map_err(Error::Disk)?;
849 metadata.len()
850 };
851
852 let protection = {
853 if disk.read_only {
854 Protection::read()
855 } else {
856 Protection::read_write()
857 }
858 };
859
860 let memory_mapping = {
861 // Conversion from u64 to usize may fail on 32bit system.
862 let image_size = usize::try_from(image_size).map_err(|_| Error::PmemDeviceImageTooBig)?;
863
864 MemoryMapping::from_fd_offset_protection(&fd, image_size, 0, protection)
865 .map_err(Error::ReservePmemMemory)?
866 };
867
868 let mapping_address = resources
Xiong Zhang383b3b52019-10-30 14:59:26 +0800869 .mmio_allocator(MmioType::High)
Jakub Starona3411ea2019-04-24 10:55:25 -0700870 .allocate_with_align(
871 image_size,
872 Alloc::PmemDevice(index),
873 format!("pmem_disk_image_{}", index),
874 // Linux kernel requires pmem namespaces to be 128 MiB aligned.
875 128 * 1024 * 1024, /* 128 MiB */
876 )
877 .map_err(Error::AllocatePmemDeviceAddress)?;
878
Xiong Zhang383b3b52019-10-30 14:59:26 +0800879 vm.add_mmio_memory(
Jakub Starona3411ea2019-04-24 10:55:25 -0700880 GuestAddress(mapping_address),
881 memory_mapping,
882 /* read_only = */ disk.read_only,
883 /* log_dirty_pages = */ false,
884 )
885 .map_err(Error::AddPmemDeviceMemory)?;
886
887 let dev = virtio::Pmem::new(fd, GuestAddress(mapping_address), image_size)
888 .map_err(Error::PmemDeviceNew)?;
889
890 Ok(VirtioDeviceStub {
891 dev: Box::new(dev) as Box<dyn VirtioDevice>,
Jakub Staroncc91fc82019-06-10 14:00:07 -0700892 jail: simple_jail(&cfg, "pmem_device.policy")?,
Jakub Starona3411ea2019-04-24 10:55:25 -0700893 })
894}
895
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -0700896// gpu_device_socket is not used when GPU support is disabled.
897#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -0800898fn create_virtio_devices(
899 cfg: &Config,
Zach Reizner55a9e502018-10-03 10:22:32 -0700900 mem: &GuestMemory,
Jakub Starona3411ea2019-04-24 10:55:25 -0700901 vm: &mut Vm,
902 resources: &mut SystemAllocator,
Zach Reizner55a9e502018-10-03 10:22:32 -0700903 _exit_evt: &EventFd,
Gurchetan Singh53edb812019-05-22 08:57:16 -0700904 wayland_device_socket: VmMemoryControlRequestSocket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -0700905 gpu_device_socket: VmMemoryControlRequestSocket,
Jakub Staron1f828d72019-04-11 12:49:29 -0700906 balloon_device_socket: BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -0700907 disk_device_sockets: &mut Vec<DiskControlResponseSocket>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800908) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -0700909 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -0800910
Zach Reizner8fb52112017-12-13 16:04:39 -0800911 for disk in &cfg.disks {
Daniel Verkamp92f73d72018-12-04 13:17:46 -0800912 let disk_device_socket = disk_device_sockets.remove(0);
David Tolnay2b089fc2019-03-04 15:33:22 -0800913 devs.push(create_block_device(cfg, disk, disk_device_socket)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800914 }
915
Jakub Starona3411ea2019-04-24 10:55:25 -0700916 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
917 devs.push(create_pmem_device(cfg, vm, resources, pmem_disk, index)?);
918 }
919
David Tolnay2b089fc2019-03-04 15:33:22 -0800920 devs.push(create_rng_device(cfg)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800921
David Tolnayde6b29a2018-12-20 11:49:46 -0800922 #[cfg(feature = "tpm")]
923 {
David Tolnay43f8e212019-02-13 17:28:16 -0800924 if cfg.software_tpm {
David Tolnay2b089fc2019-03-04 15:33:22 -0800925 devs.push(create_tpm_device(cfg)?);
David Tolnay43f8e212019-02-13 17:28:16 -0800926 }
David Tolnayde6b29a2018-12-20 11:49:46 -0800927 }
928
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800929 if let Some(single_touch_spec) = &cfg.virtio_single_touch {
930 devs.push(create_single_touch_device(cfg, single_touch_spec)?);
931 }
932
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800933 if let Some(trackpad_spec) = &cfg.virtio_trackpad {
David Tolnay2b089fc2019-03-04 15:33:22 -0800934 devs.push(create_trackpad_device(cfg, trackpad_spec)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800935 }
936
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800937 if let Some(mouse_socket) = &cfg.virtio_mouse {
David Tolnay2b089fc2019-03-04 15:33:22 -0800938 devs.push(create_mouse_device(cfg, mouse_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800939 }
940
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800941 if let Some(keyboard_socket) = &cfg.virtio_keyboard {
David Tolnay2b089fc2019-03-04 15:33:22 -0800942 devs.push(create_keyboard_device(cfg, keyboard_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800943 }
944
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800945 for dev_path in &cfg.virtio_input_evdevs {
David Tolnay2b089fc2019-03-04 15:33:22 -0800946 devs.push(create_vinput_device(cfg, dev_path)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800947 }
948
David Tolnay2b089fc2019-03-04 15:33:22 -0800949 devs.push(create_balloon_device(cfg, balloon_device_socket)?);
Dylan Reid295ccac2017-11-06 14:06:24 -0800950
Zach Reizner39aa26b2017-12-12 18:03:23 -0800951 // We checked above that if the IP is defined, then the netmask is, too.
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800952 for tap_fd in &cfg.tap_fd {
David Tolnay2b089fc2019-03-04 15:33:22 -0800953 devs.push(create_tap_net_device(cfg, *tap_fd)?);
Jorge E. Moreirab7952802019-02-12 16:43:05 -0800954 }
955
David Tolnay2b089fc2019-03-04 15:33:22 -0800956 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
957 (cfg.host_ip, cfg.netmask, cfg.mac_address)
958 {
959 devs.push(create_net_device(cfg, host_ip, netmask, mac_address, mem)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800960 }
961
David Tolnayfa701712019-02-13 16:42:54 -0800962 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900963 let mut resource_bridges = Vec::<virtio::resource_bridge::ResourceResponseSocket>::new();
964
965 if let Some(wayland_socket_path) = cfg.wayland_socket_path.as_ref() {
966 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
967 let mut wl_resource_bridge = None::<virtio::resource_bridge::ResourceRequestSocket>;
968
969 #[cfg(feature = "gpu")]
970 {
971 if cfg.gpu {
972 let (wl_socket, gpu_socket) =
973 virtio::resource_bridge::pair().map_err(Error::CreateSocket)?;
974 resource_bridges.push(gpu_socket);
975 wl_resource_bridge = Some(wl_socket);
976 }
977 }
978
979 devs.push(create_wayland_device(
980 cfg,
981 wayland_socket_path,
982 wayland_device_socket,
983 wl_resource_bridge,
984 )?);
985 }
David Tolnayfa701712019-02-13 16:42:54 -0800986
Zach Reizner3a8100a2017-09-13 19:15:43 -0700987 #[cfg(feature = "gpu")]
988 {
989 if cfg.gpu {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700990 devs.push(create_gpu_device(
991 cfg,
992 _exit_evt,
993 gpu_device_socket,
994 resource_bridges,
995 cfg.wayland_socket_path.clone(),
996 cfg.x_display.clone(),
997 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -0700998 }
999 }
1000
Zach Reizneraa575662018-08-15 10:46:32 -07001001 if let Some(cid) = cfg.cid {
David Tolnay2b089fc2019-03-04 15:33:22 -08001002 devs.push(create_vhost_vsock_device(cfg, cid, mem)?);
Zach Reizneraa575662018-08-15 10:46:32 -07001003 }
1004
David Tolnayfd0971d2019-03-04 17:15:57 -08001005 let chronos = get_chronos_ids();
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001006 for shared_dir in &cfg.shared_dirs {
1007 let SharedDir {
1008 src,
1009 tag,
1010 kind,
1011 uid_map,
1012 gid_map,
1013 cfg: fs_cfg,
1014 } = shared_dir;
David Tolnay2b089fc2019-03-04 15:33:22 -08001015
Chirantan Ekbotebd4723b2019-07-17 10:50:30 +09001016 let dev = match kind {
1017 SharedDirKind::FS => create_fs_device(cfg, uid_map, gid_map, src, tag, fs_cfg.clone())?,
1018 SharedDirKind::P9 => create_9p_device(cfg, chronos, src, tag)?,
1019 };
1020 devs.push(dev);
David Tolnay2b089fc2019-03-04 15:33:22 -08001021 }
1022
1023 Ok(devs)
1024}
1025
1026fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -06001027 cfg: &Config,
David Tolnay2b089fc2019-03-04 15:33:22 -08001028 mem: &GuestMemory,
Jakub Starona3411ea2019-04-24 10:55:25 -07001029 vm: &mut Vm,
1030 resources: &mut SystemAllocator,
David Tolnay2b089fc2019-03-04 15:33:22 -08001031 exit_evt: &EventFd,
Xiong Zhanga5d248c2019-09-17 14:17:19 -07001032 control_sockets: &mut Vec<TaggedControlSocket>,
Gurchetan Singh53edb812019-05-22 08:57:16 -07001033 wayland_device_socket: VmMemoryControlRequestSocket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001034 gpu_device_socket: VmMemoryControlRequestSocket,
Jakub Staron1f828d72019-04-11 12:49:29 -07001035 balloon_device_socket: BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07001036 disk_device_sockets: &mut Vec<DiskControlResponseSocket>,
Jingkui Wang100e6e42019-03-08 20:41:57 -08001037 usb_provider: HostBackendDeviceProvider,
David Tolnayfdac5ed2019-03-08 16:56:14 -08001038) -> DeviceResult<Vec<(Box<dyn PciDevice>, Option<Minijail>)>> {
David Tolnay2b089fc2019-03-04 15:33:22 -08001039 let stubs = create_virtio_devices(
1040 &cfg,
1041 mem,
Jakub Starona3411ea2019-04-24 10:55:25 -07001042 vm,
1043 resources,
David Tolnay2b089fc2019-03-04 15:33:22 -08001044 exit_evt,
1045 wayland_device_socket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001046 gpu_device_socket,
David Tolnay2b089fc2019-03-04 15:33:22 -08001047 balloon_device_socket,
1048 disk_device_sockets,
1049 )?;
1050
1051 let mut pci_devices = Vec::new();
1052
1053 for stub in stubs {
Xiong Zhanga5d248c2019-09-17 14:17:19 -07001054 let dev = if stub.dev.msix_vectors() > 0 {
1055 let (msi_host_socket, msi_device_socket) =
1056 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
1057 control_sockets.push(TaggedControlSocket::VmIrq(msi_host_socket));
1058
1059 VirtioPciDevice::new(mem.clone(), stub.dev, Some(msi_device_socket))
1060 .map_err(Error::VirtioPciDev)?
1061 } else {
1062 VirtioPciDevice::new(mem.clone(), stub.dev, None).map_err(Error::VirtioPciDev)?
1063 };
1064
David Tolnayfdac5ed2019-03-08 16:56:14 -08001065 let dev = Box::new(dev) as Box<dyn PciDevice>;
David Tolnay2b089fc2019-03-04 15:33:22 -08001066 pci_devices.push((dev, stub.jail));
1067 }
1068
1069 if cfg.cras_audio {
paulhsia580d4182019-05-24 16:53:55 +08001070 let mut server = Box::new(CrasClient::new().map_err(Error::CreateCrasClient)?);
1071 if cfg.cras_capture {
1072 server.enable_cras_capture();
1073 }
David Tolnay2b089fc2019-03-04 15:33:22 -08001074 let cras_audio = devices::Ac97Dev::new(mem.clone(), server);
1075
1076 pci_devices.push((
1077 Box::new(cras_audio),
1078 simple_jail(&cfg, "cras_audio_device.policy")?,
1079 ));
1080 }
1081
1082 if cfg.null_audio {
1083 let server = Box::new(DummyStreamSource::new());
1084 let null_audio = devices::Ac97Dev::new(mem.clone(), server);
1085
1086 pci_devices.push((
1087 Box::new(null_audio),
1088 simple_jail(&cfg, "null_audio_device.policy")?,
1089 ));
1090 }
Jingkui Wang100e6e42019-03-08 20:41:57 -08001091 // Create xhci controller.
1092 let usb_controller = Box::new(XhciController::new(mem.clone(), usb_provider));
1093 pci_devices.push((usb_controller, simple_jail(&cfg, "xhci.policy")?));
David Tolnay2b089fc2019-03-04 15:33:22 -08001094
Xiong Zhang17b0daf2019-04-23 17:14:50 +08001095 if cfg.vfio.is_some() {
Xiong Zhang4b5bb3a2019-04-23 17:15:21 +08001096 let (vfio_host_socket_irq, vfio_device_socket_irq) =
1097 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
1098 control_sockets.push(TaggedControlSocket::VmIrq(vfio_host_socket_irq));
1099
Xiong Zhang17b0daf2019-04-23 17:14:50 +08001100 let vfio_path = cfg.vfio.as_ref().unwrap().as_path();
Xiong Zhangc554fff2019-04-23 17:14:55 +08001101 let vfiodevice =
Daniel Verkamp04a82c72019-09-24 11:06:58 -07001102 VfioDevice::new(vfio_path, vm, mem.clone()).map_err(Error::CreateVfioDevice)?;
Xiong Zhang4b5bb3a2019-04-23 17:15:21 +08001103 let vfiopcidevice = Box::new(VfioPciDevice::new(vfiodevice, vfio_device_socket_irq));
Xiong Zhang17b0daf2019-04-23 17:14:50 +08001104 pci_devices.push((vfiopcidevice, simple_jail(&cfg, "vfio_device.policy")?));
1105 }
1106
David Tolnay2b089fc2019-03-04 15:33:22 -08001107 Ok(pci_devices)
1108}
1109
1110#[derive(Copy, Clone)]
1111struct Ids {
1112 uid: uid_t,
1113 gid: gid_t,
1114}
1115
David Tolnayfd0971d2019-03-04 17:15:57 -08001116fn get_chronos_ids() -> Ids {
Chirantan Ekboteebd56812018-04-16 19:32:04 -07001117 let chronos_user_group = CStr::from_bytes_with_nul(b"chronos\0").unwrap();
David Tolnay2b089fc2019-03-04 15:33:22 -08001118
Chirantan Ekboteebd56812018-04-16 19:32:04 -07001119 let chronos_uid = match get_user_id(&chronos_user_group) {
1120 Ok(u) => u,
1121 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001122 warn!("falling back to current user id for 9p: {}", e);
Chirantan Ekboteebd56812018-04-16 19:32:04 -07001123 geteuid()
1124 }
1125 };
David Tolnay2b089fc2019-03-04 15:33:22 -08001126
Chirantan Ekboteebd56812018-04-16 19:32:04 -07001127 let chronos_gid = match get_group_id(&chronos_user_group) {
1128 Ok(u) => u,
1129 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001130 warn!("falling back to current group id for 9p: {}", e);
Chirantan Ekboteebd56812018-04-16 19:32:04 -07001131 getegid()
1132 }
1133 };
1134
David Tolnayfd0971d2019-03-04 17:15:57 -08001135 Ids {
David Tolnay2b089fc2019-03-04 15:33:22 -08001136 uid: chronos_uid,
1137 gid: chronos_gid,
David Tolnayfd0971d2019-03-04 17:15:57 -08001138 }
David Tolnay41a6f842019-03-01 16:18:44 -08001139}
1140
David Tolnay48c48292019-03-01 16:54:25 -08001141// Set the uid/gid for the jailed process and give a basic id map. This is
1142// required for bind mounts to work.
David Tolnayfd0971d2019-03-04 17:15:57 -08001143fn add_crosvm_user_to_jail(jail: &mut Minijail, feature: &str) -> Result<Ids> {
David Tolnay48c48292019-03-01 16:54:25 -08001144 let crosvm_user_group = CStr::from_bytes_with_nul(b"crosvm\0").unwrap();
1145
1146 let crosvm_uid = match get_user_id(&crosvm_user_group) {
1147 Ok(u) => u,
1148 Err(e) => {
1149 warn!("falling back to current user id for {}: {}", feature, e);
1150 geteuid()
1151 }
1152 };
1153
1154 let crosvm_gid = match get_group_id(&crosvm_user_group) {
1155 Ok(u) => u,
1156 Err(e) => {
1157 warn!("falling back to current group id for {}: {}", feature, e);
1158 getegid()
1159 }
1160 };
1161
1162 jail.change_uid(crosvm_uid);
1163 jail.change_gid(crosvm_gid);
1164 jail.uidmap(&format!("{0} {0} 1", crosvm_uid))
1165 .map_err(Error::SettingUidMap)?;
1166 jail.gidmap(&format!("{0} {0} 1", crosvm_gid))
1167 .map_err(Error::SettingGidMap)?;
1168
David Tolnay41a6f842019-03-01 16:18:44 -08001169 Ok(Ids {
1170 uid: crosvm_uid,
1171 gid: crosvm_gid,
1172 })
David Tolnay48c48292019-03-01 16:54:25 -08001173}
1174
David Tolnayfd0971d2019-03-04 17:15:57 -08001175fn raw_fd_from_path(path: &Path) -> Result<RawFd> {
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001176 if !path.is_file() {
David Tolnayfd0971d2019-03-04 17:15:57 -08001177 return Err(Error::InvalidFdPath);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001178 }
1179 let raw_fd = path
1180 .file_name()
1181 .and_then(|fd_osstr| fd_osstr.to_str())
1182 .and_then(|fd_str| fd_str.parse::<c_int>().ok())
1183 .ok_or(Error::InvalidFdPath)?;
David Tolnayfd0971d2019-03-04 17:15:57 -08001184 validate_raw_fd(raw_fd).map_err(Error::ValidateRawFd)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001185}
1186
David Tolnayfd0971d2019-03-04 17:15:57 -08001187fn create_input_socket(path: &Path) -> Result<UnixStream> {
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001188 if path.parent() == Some(Path::new("/proc/self/fd")) {
1189 // Safe because we will validate |raw_fd|.
1190 unsafe { Ok(UnixStream::from_raw_fd(raw_fd_from_path(path)?)) }
1191 } else {
David Tolnayfd0971d2019-03-04 17:15:57 -08001192 UnixStream::connect(path).map_err(Error::InputEventsOpen)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001193 }
1194}
1195
Matt Delco84cf9c02019-10-07 22:38:13 -07001196fn setup_vcpu_signal_handler(use_kvm_signals: bool) -> Result<()> {
1197 if use_kvm_signals {
1198 unsafe {
1199 extern "C" fn handle_signal() {}
1200 // Our signal handler does nothing and is trivially async signal safe.
1201 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
1202 .map_err(Error::RegisterSignalHandler)?;
1203 }
1204 block_signal(SIGRTMIN() + 0).map_err(Error::BlockSignal)?;
1205 } else {
1206 unsafe {
1207 extern "C" fn handle_signal() {
1208 Vcpu::set_local_immediate_exit(true);
1209 }
1210 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
1211 .map_err(Error::RegisterSignalHandler)?;
1212 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001213 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001214 Ok(())
1215}
1216
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001217#[derive(Default)]
1218struct VcpuRunMode {
1219 mtx: Mutex<VmRunMode>,
1220 cvar: Condvar,
1221}
1222
1223impl VcpuRunMode {
1224 fn set_and_notify(&self, new_mode: VmRunMode) {
1225 *self.mtx.lock() = new_mode;
1226 self.cvar.notify_all();
1227 }
1228}
1229
Zach Reizner55a9e502018-10-03 10:22:32 -07001230fn run_vcpu(
Matt Delco84cf9c02019-10-07 22:38:13 -07001231 mut vcpu: Vcpu,
Zach Reizner55a9e502018-10-03 10:22:32 -07001232 cpu_id: u32,
Daniel Verkamp107edb32019-04-05 09:58:48 -07001233 vcpu_affinity: Vec<usize>,
Zach Reizner55a9e502018-10-03 10:22:32 -07001234 start_barrier: Arc<Barrier>,
1235 io_bus: devices::Bus,
1236 mmio_bus: devices::Bus,
1237 exit_evt: EventFd,
Zach Reizner795355a2019-01-16 17:37:57 -08001238 requires_kvmclock_ctrl: bool,
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001239 run_mode_arc: Arc<VcpuRunMode>,
Matt Delco84cf9c02019-10-07 22:38:13 -07001240 use_kvm_signals: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001241) -> Result<JoinHandle<()>> {
Zach Reizner8fb52112017-12-13 16:04:39 -08001242 thread::Builder::new()
1243 .name(format!("crosvm_vcpu{}", cpu_id))
1244 .spawn(move || {
Daniel Verkamp107edb32019-04-05 09:58:48 -07001245 if vcpu_affinity.len() != 0 {
1246 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
1247 error!("Failed to set CPU affinity: {}", e);
1248 }
1249 }
1250
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001251 let mut sig_ok = true;
Matt Delco84cf9c02019-10-07 22:38:13 -07001252 if use_kvm_signals {
1253 match get_blocked_signals() {
1254 Ok(mut v) => {
1255 v.retain(|&x| x != SIGRTMIN() + 0);
1256 if let Err(e) = vcpu.set_signal_mask(&v) {
1257 error!(
1258 "Failed to set the KVM_SIGNAL_MASK for vcpu {} : {}",
1259 cpu_id, e
1260 );
1261 sig_ok = false;
1262 }
1263 }
1264 Err(e) => {
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001265 error!(
Matt Delco84cf9c02019-10-07 22:38:13 -07001266 "Failed to retrieve signal mask for vcpu {} : {}",
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001267 cpu_id, e
1268 );
1269 sig_ok = false;
1270 }
Matt Delco84cf9c02019-10-07 22:38:13 -07001271 };
1272 } else {
1273 vcpu.set_thread_id(SIGRTMIN() + 0);
1274 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001275
Zach Reizner8fb52112017-12-13 16:04:39 -08001276 start_barrier.wait();
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001277
David Tolnay8f3a2322018-11-30 17:11:35 -08001278 if sig_ok {
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001279 'vcpu_loop: loop {
1280 let mut interrupted_by_signal = false;
David Tolnay8f3a2322018-11-30 17:11:35 -08001281 match vcpu.run() {
1282 Ok(VcpuExit::IoIn { port, mut size }) => {
1283 let mut data = [0; 8];
1284 if size > data.len() {
1285 error!("unsupported IoIn size of {} bytes", size);
1286 size = data.len();
Zach Reizner39aa26b2017-12-12 18:03:23 -08001287 }
David Tolnay8f3a2322018-11-30 17:11:35 -08001288 io_bus.read(port as u64, &mut data[..size]);
1289 if let Err(e) = vcpu.set_data(&data[..size]) {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001290 error!("failed to set return data for IoIn: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001291 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001292 }
David Tolnay8f3a2322018-11-30 17:11:35 -08001293 Ok(VcpuExit::IoOut {
1294 port,
1295 mut size,
1296 data,
1297 }) => {
1298 if size > data.len() {
1299 error!("unsupported IoOut size of {} bytes", size);
1300 size = data.len();
1301 }
1302 io_bus.write(port as u64, &data[..size]);
1303 }
1304 Ok(VcpuExit::MmioRead { address, size }) => {
1305 let mut data = [0; 8];
1306 mmio_bus.read(address, &mut data[..size]);
1307 // Setting data for mmio can not fail.
1308 let _ = vcpu.set_data(&data[..size]);
1309 }
1310 Ok(VcpuExit::MmioWrite {
1311 address,
1312 size,
1313 data,
1314 }) => {
1315 mmio_bus.write(address, &data[..size]);
1316 }
1317 Ok(VcpuExit::Hlt) => break,
1318 Ok(VcpuExit::Shutdown) => break,
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001319 Ok(VcpuExit::SystemEvent(_, _)) => break,
David Tolnay8f3a2322018-11-30 17:11:35 -08001320 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
1321 Err(e) => match e.errno() {
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001322 libc::EINTR => interrupted_by_signal = true,
1323 libc::EAGAIN => {}
David Tolnay8f3a2322018-11-30 17:11:35 -08001324 _ => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001325 error!("vcpu hit unknown error: {}", e);
David Tolnay8f3a2322018-11-30 17:11:35 -08001326 break;
1327 }
1328 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08001329 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001330
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001331 if interrupted_by_signal {
Matt Delco84cf9c02019-10-07 22:38:13 -07001332 if use_kvm_signals {
1333 // Try to clear the signal that we use to kick VCPU if it is pending before
1334 // attempting to handle pause requests.
1335 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
1336 error!("failed to clear pending signal: {}", e);
1337 break;
1338 }
1339 } else {
1340 vcpu.set_immediate_exit(false);
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001341 }
1342 let mut run_mode_lock = run_mode_arc.mtx.lock();
1343 loop {
1344 match *run_mode_lock {
1345 VmRunMode::Running => break,
Zach Reizner795355a2019-01-16 17:37:57 -08001346 VmRunMode::Suspending => {
1347 // On KVM implementations that use a paravirtualized clock (e.g.
1348 // x86), a flag must be set to indicate to the guest kernel that
1349 // a VCPU was suspended. The guest kernel will use this flag to
1350 // prevent the soft lockup detection from triggering when this
1351 // VCPU resumes, which could happen days later in realtime.
1352 if requires_kvmclock_ctrl {
1353 if let Err(e) = vcpu.kvmclock_ctrl() {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001354 error!("failed to signal to kvm that vcpu {} is being suspended: {}", cpu_id, e);
Zach Reizner795355a2019-01-16 17:37:57 -08001355 }
1356 }
1357 }
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001358 VmRunMode::Exiting => break 'vcpu_loop,
1359 }
1360 // Give ownership of our exclusive lock to the condition variable that
1361 // will block. When the condition variable is notified, `wait` will
1362 // unblock and return a new exclusive lock.
1363 run_mode_lock = run_mode_arc.cvar.wait(run_mode_lock);
1364 }
1365 }
David Tolnay8f3a2322018-11-30 17:11:35 -08001366 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001367 }
Zach Reizner8fb52112017-12-13 16:04:39 -08001368 exit_evt
Zach Reizner39aa26b2017-12-12 18:03:23 -08001369 .write(1)
1370 .expect("failed to signal vcpu exit eventfd");
David Tolnay2bac1e72018-12-12 14:33:42 -08001371 })
1372 .map_err(Error::SpawnVcpu)
Zach Reizner39aa26b2017-12-12 18:03:23 -08001373}
1374
Sonny Raod5f66082019-04-24 12:24:38 -07001375// Reads the contents of a file and converts the space-separated fields into a Vec of u64s.
1376// Returns an error if any of the fields fail to parse.
1377fn file_fields_to_u64<P: AsRef<Path>>(path: P) -> io::Result<Vec<u64>> {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001378 let mut file = File::open(path)?;
1379
1380 let mut buf = [0u8; 32];
1381 let count = file.read(&mut buf)?;
1382
Zach Reizner55a9e502018-10-03 10:22:32 -07001383 let content =
1384 str::from_utf8(&buf[..count]).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1385 content
1386 .trim()
Sonny Raod5f66082019-04-24 12:24:38 -07001387 .split_whitespace()
1388 .map(|x| {
1389 x.parse::<u64>()
1390 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
1391 })
1392 .collect()
1393}
1394
1395// Reads the contents of a file and converts them into a u64, and if there
1396// are multiple fields it only returns the first one.
1397fn file_to_u64<P: AsRef<Path>>(path: P) -> io::Result<u64> {
1398 file_fields_to_u64(path)?
1399 .into_iter()
1400 .next()
1401 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "empty file"))
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001402}
1403
Dylan Reid059a1882018-07-23 17:58:09 -07001404pub fn run_config(cfg: Config) -> Result<()> {
Lepton Wu9105e9f2019-03-14 11:38:31 -07001405 if cfg.sandbox {
Dylan Reid059a1882018-07-23 17:58:09 -07001406 // Printing something to the syslog before entering minijail so that libc's syslogger has a
1407 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
1408 // access to those files will not be possible.
1409 info!("crosvm entering multiprocess mode");
1410 }
1411
Jingkui Wang100e6e42019-03-08 20:41:57 -08001412 let (usb_control_socket, usb_provider) =
David Tolnay5fb3f512019-04-12 19:22:33 -07001413 HostBackendDeviceProvider::new().map_err(Error::CreateUsbProvider)?;
Dylan Reid059a1882018-07-23 17:58:09 -07001414 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
1415 // before any jailed devices have been spawned, so that we can catch any of them that fail very
1416 // quickly.
1417 let sigchld_fd = SignalFd::new(libc::SIGCHLD).map_err(Error::CreateSignalFd)?;
1418
David Tolnay2b089fc2019-03-04 15:33:22 -08001419 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
1420 Some(File::open(initrd_path).map_err(|e| Error::OpenInitrd(initrd_path.clone(), e))?)
Daniel Verkampe403f5c2018-12-11 16:29:26 -08001421 } else {
1422 None
1423 };
1424
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07001425 let vm_image = match cfg.executable_path {
1426 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
1427 File::open(kernel_path).map_err(|e| Error::OpenKernel(kernel_path.to_path_buf(), e))?,
1428 ),
1429 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
1430 File::open(bios_path).map_err(|e| Error::OpenBios(bios_path.to_path_buf(), e))?,
1431 ),
1432 _ => panic!("Did not receive a bios or kernel, should be impossible."),
1433 };
1434
Dylan Reid059a1882018-07-23 17:58:09 -07001435 let components = VmComponents {
Jakub Staronf55f75d2019-04-26 11:22:51 -07001436 memory_size: (cfg.memory.unwrap_or(256) << 20) as u64,
Dylan Reid059a1882018-07-23 17:58:09 -07001437 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -07001438 vcpu_affinity: cfg.vcpu_affinity.clone(),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07001439 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -08001440 android_fstab: cfg
1441 .android_fstab
1442 .as_ref()
David Tolnay2b089fc2019-03-04 15:33:22 -08001443 .map(|x| File::open(x).map_err(|e| Error::OpenAndroidFstab(x.to_path_buf(), e)))
Tristan Muntsinger4133b012018-12-21 16:01:56 -08001444 .map_or(Ok(None), |v| v.map(Some))?,
Daniel Verkampe403f5c2018-12-11 16:29:26 -08001445 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -07001446 extra_kernel_params: cfg.params.clone(),
1447 wayland_dmabuf: cfg.wayland_dmabuf,
Dylan Reid059a1882018-07-23 17:58:09 -07001448 };
1449
Zach Reiznera60744b2019-02-13 17:33:32 -08001450 let control_server_socket = match &cfg.socket_path {
1451 Some(path) => Some(UnlinkUnixSeqpacketListener(
1452 UnixSeqpacketListener::bind(path).map_err(Error::CreateSocket)?,
1453 )),
1454 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07001455 };
Zach Reiznera60744b2019-02-13 17:33:32 -08001456
1457 let mut control_sockets = Vec::new();
Zach Reizner55a9e502018-10-03 10:22:32 -07001458 let (wayland_host_socket, wayland_device_socket) =
Gurchetan Singh53edb812019-05-22 08:57:16 -07001459 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?;
1460 control_sockets.push(TaggedControlSocket::VmMemory(wayland_host_socket));
Dylan Reid059a1882018-07-23 17:58:09 -07001461 // Balloon gets a special socket so balloon requests can be forwarded from the main process.
Zach Reizner55a9e502018-10-03 10:22:32 -07001462 let (balloon_host_socket, balloon_device_socket) =
Jakub Staron1f828d72019-04-11 12:49:29 -07001463 msg_socket::pair::<BalloonControlCommand, ()>().map_err(Error::CreateSocket)?;
Dylan Reid059a1882018-07-23 17:58:09 -07001464
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001465 // Create one control socket per disk.
1466 let mut disk_device_sockets = Vec::new();
1467 let mut disk_host_sockets = Vec::new();
1468 let disk_count = cfg.disks.len();
1469 for _ in 0..disk_count {
1470 let (disk_host_socket, disk_device_socket) =
Jakub Staronecf81e02019-04-11 11:43:39 -07001471 msg_socket::pair::<DiskControlCommand, DiskControlResult>()
1472 .map_err(Error::CreateSocket)?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001473 disk_host_sockets.push(disk_host_socket);
Jakub Starone7c59052019-04-09 12:31:14 -07001474 disk_device_sockets.push(disk_device_socket);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001475 }
1476
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001477 let (gpu_host_socket, gpu_device_socket) =
1478 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?;
1479 control_sockets.push(TaggedControlSocket::VmMemory(gpu_host_socket));
1480
Lepton Wu20333e42019-03-14 10:48:03 -07001481 let sandbox = cfg.sandbox;
Trent Begin17ccaad2019-04-17 13:51:25 -06001482 let linux = Arch::build_vm(
1483 components,
1484 cfg.split_irqchip,
1485 &cfg.serial_parameters,
Zach Reiznera8adff02019-08-13 11:20:14 -07001486 simple_jail(&cfg, "serial.policy")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07001487 |mem, vm, sys_allocator, exit_evt| {
Trent Begin17ccaad2019-04-17 13:51:25 -06001488 create_devices(
1489 &cfg,
Jakub Starona3411ea2019-04-24 10:55:25 -07001490 mem,
1491 vm,
1492 sys_allocator,
1493 exit_evt,
Xiong Zhanga5d248c2019-09-17 14:17:19 -07001494 &mut control_sockets,
Trent Begin17ccaad2019-04-17 13:51:25 -06001495 wayland_device_socket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001496 gpu_device_socket,
Trent Begin17ccaad2019-04-17 13:51:25 -06001497 balloon_device_socket,
1498 &mut disk_device_sockets,
1499 usb_provider,
1500 )
1501 },
1502 )
David Tolnaybe034262019-03-04 17:48:36 -08001503 .map_err(Error::BuildVm)?;
Lepton Wu60893882018-11-21 11:06:18 -08001504
1505 let _render_node_host = ();
1506 #[cfg(feature = "gpu-forward")]
1507 let (_render_node_host, linux) = {
1508 // Rebinds linux as mutable.
1509 let mut linux = linux;
1510
1511 // Reserve memory range for GPU buffer allocation in advance to bypass region count
1512 // limitation. We use mremap/MAP_FIXED later to make sure GPU buffers fall into this range.
1513 let gpu_mmap =
1514 MemoryMapping::new_protection(RENDER_NODE_HOST_SIZE as usize, Protection::none())
1515 .map_err(Error::ReserveGpuMemory)?;
1516
Xiong Zhang383b3b52019-10-30 14:59:26 +08001517 // Put the non-accessible memory map into high mmio so that no other devices use that
Lepton Wu60893882018-11-21 11:06:18 -08001518 // guest address space.
1519 let gpu_addr = linux
1520 .resources
Xiong Zhang383b3b52019-10-30 14:59:26 +08001521 .mmio_allocator(MmioType::High)
Daniel Prilikd92f81a2019-03-26 14:28:19 -07001522 .allocate(
1523 RENDER_NODE_HOST_SIZE,
1524 Alloc::GpuRenderNode,
1525 "gpu_render_node".to_string(),
1526 )
1527 .map_err(|_| Error::AllocateGpuDeviceAddress)?;
Lepton Wu60893882018-11-21 11:06:18 -08001528
1529 let host = RenderNodeHost::start(&gpu_mmap, gpu_addr, linux.vm.get_memory().clone());
1530
1531 // Makes the gpu memory accessible at allocated address.
1532 linux
1533 .vm
Xiong Zhang383b3b52019-10-30 14:59:26 +08001534 .add_mmio_memory(
Lepton Wu60893882018-11-21 11:06:18 -08001535 GuestAddress(gpu_addr),
1536 gpu_mmap,
1537 /* read_only = */ false,
1538 /* log_dirty_pages = */ false,
1539 )
1540 .map_err(Error::AddGpuDeviceMemory)?;
1541 (host, linux)
1542 };
1543
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001544 run_control(
1545 linux,
Zach Reiznera60744b2019-02-13 17:33:32 -08001546 control_server_socket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001547 control_sockets,
1548 balloon_host_socket,
1549 &disk_host_sockets,
Jingkui Wang100e6e42019-03-08 20:41:57 -08001550 usb_control_socket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001551 sigchld_fd,
Lepton Wu60893882018-11-21 11:06:18 -08001552 _render_node_host,
Lepton Wu20333e42019-03-14 10:48:03 -07001553 sandbox,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001554 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07001555}
1556
Zach Reizner55a9e502018-10-03 10:22:32 -07001557fn run_control(
1558 mut linux: RunnableLinuxVm,
Zach Reiznera60744b2019-02-13 17:33:32 -08001559 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Jakub Starond99cd0a2019-04-11 14:09:39 -07001560 mut control_sockets: Vec<TaggedControlSocket>,
Jakub Staron1f828d72019-04-11 12:49:29 -07001561 balloon_host_socket: BalloonControlRequestSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07001562 disk_host_sockets: &[DiskControlRequestSocket],
Jingkui Wang100e6e42019-03-08 20:41:57 -08001563 usb_control_socket: UsbControlSocket,
Zach Reizner55a9e502018-10-03 10:22:32 -07001564 sigchld_fd: SignalFd,
Lepton Wu60893882018-11-21 11:06:18 -08001565 _render_node_host: RenderNodeHost,
Lepton Wu20333e42019-03-14 10:48:03 -07001566 sandbox: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001567) -> Result<()> {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001568 // Paths to get the currently available memory and the low memory threshold.
David Tolnay5bbbf612018-12-01 17:49:30 -08001569 const LOWMEM_MARGIN: &str = "/sys/kernel/mm/chromeos-low_mem/margin";
1570 const LOWMEM_AVAILABLE: &str = "/sys/kernel/mm/chromeos-low_mem/available";
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001571
1572 // The amount of additional memory to claim back from the VM whenever the system is
1573 // low on memory.
1574 const ONE_GB: u64 = (1 << 30);
1575
Dylan Reid0ed91ab2018-05-31 15:42:18 -07001576 let max_balloon_memory = match linux.vm.get_memory().memory_size() {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001577 // If the VM has at least 1.5 GB, the balloon driver can consume all but the last 1 GB.
1578 n if n >= (ONE_GB / 2) * 3 => n - ONE_GB,
1579 // Otherwise, if the VM has at least 500MB the balloon driver will consume at most
1580 // half of it.
1581 n if n >= (ONE_GB / 2) => n / 2,
1582 // Otherwise, the VM is too small for us to take memory away from it.
1583 _ => 0,
1584 };
1585 let mut current_balloon_memory: u64 = 0;
1586 let balloon_memory_increment: u64 = max_balloon_memory / 16;
1587
Zach Reizner5bed0d22018-03-28 02:31:11 -07001588 #[derive(PollToken)]
1589 enum Token {
1590 Exit,
1591 Stdin,
1592 ChildSignal,
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001593 CheckAvailableMemory,
1594 LowMemory,
1595 LowmemTimer,
Zach Reiznera60744b2019-02-13 17:33:32 -08001596 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07001597 VmControl { index: usize },
1598 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001599
1600 let stdin_handle = stdin();
1601 let stdin_lock = stdin_handle.lock();
1602 stdin_lock
1603 .set_raw_mode()
1604 .expect("failed to set terminal raw mode");
1605
Zach Reiznerb2110be2019-07-23 15:55:03 -07001606 let poll_ctx = PollContext::build_with(&[
1607 (&linux.exit_evt, Token::Exit),
1608 (&sigchld_fd, Token::ChildSignal),
1609 ])
1610 .map_err(Error::PollContextAdd)?;
1611
Zach Reizner5bed0d22018-03-28 02:31:11 -07001612 if let Err(e) = poll_ctx.add(&stdin_handle, Token::Stdin) {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001613 warn!("failed to add stdin to poll context: {}", e);
Zach Reizner5bed0d22018-03-28 02:31:11 -07001614 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001615
1616 if let Some(socket_server) = &control_server_socket {
1617 poll_ctx
1618 .add(socket_server, Token::VmControlServer)
1619 .map_err(Error::PollContextAdd)?;
1620 }
Dylan Reid059a1882018-07-23 17:58:09 -07001621 for (index, socket) in control_sockets.iter().enumerate() {
Zach Reizner55a9e502018-10-03 10:22:32 -07001622 poll_ctx
1623 .add(socket.as_ref(), Token::VmControl { index })
1624 .map_err(Error::PollContextAdd)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001625 }
1626
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001627 // Watch for low memory notifications and take memory back from the VM.
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001628 let low_mem = File::open("/dev/chromeos-low-mem").ok();
David Tolnay64cd5ea2019-04-15 15:56:35 -07001629 if let Some(low_mem) = &low_mem {
Zach Reizner55a9e502018-10-03 10:22:32 -07001630 poll_ctx
1631 .add(low_mem, Token::LowMemory)
1632 .map_err(Error::PollContextAdd)?;
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001633 } else {
1634 warn!("Unable to open low mem indicator, maybe not a chrome os kernel");
1635 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001636
1637 // Used to rate limit balloon requests.
1638 let mut lowmem_timer = TimerFd::new().map_err(Error::CreateTimerFd)?;
Zach Reizner55a9e502018-10-03 10:22:32 -07001639 poll_ctx
1640 .add(&lowmem_timer, Token::LowmemTimer)
1641 .map_err(Error::PollContextAdd)?;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001642
1643 // Used to check whether it's ok to start giving memory back to the VM.
1644 let mut freemem_timer = TimerFd::new().map_err(Error::CreateTimerFd)?;
Zach Reizner55a9e502018-10-03 10:22:32 -07001645 poll_ctx
1646 .add(&freemem_timer, Token::CheckAvailableMemory)
1647 .map_err(Error::PollContextAdd)?;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001648
1649 // Used to add jitter to timer values so that we don't have a thundering herd problem when
1650 // multiple VMs are running.
Daniel Prilik22006042019-01-14 14:19:04 -08001651 let mut simple_rng = SimpleRng::new(
1652 SystemTime::now()
1653 .duration_since(UNIX_EPOCH)
1654 .expect("time went backwards")
1655 .subsec_nanos() as u64,
1656 );
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001657
Lepton Wu20333e42019-03-14 10:48:03 -07001658 if sandbox {
1659 // Before starting VCPUs, in case we started with some capabilities, drop them all.
1660 drop_capabilities().map_err(Error::DropCapabilities)?;
1661 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08001662
Daniel Verkamp37c4a782019-01-04 10:44:17 -08001663 let mut vcpu_handles = Vec::with_capacity(linux.vcpus.len());
1664 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpus.len() + 1));
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001665 let run_mode_arc = Arc::new(VcpuRunMode::default());
Matt Delco84cf9c02019-10-07 22:38:13 -07001666 let use_kvm_signals = !linux.kvm.check_extension(Cap::ImmediateExit);
1667 setup_vcpu_signal_handler(use_kvm_signals)?;
Daniel Verkamp94c35272019-09-12 13:31:30 -07001668 let vcpus = linux.vcpus.split_off(0);
1669 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Zach Reizner55a9e502018-10-03 10:22:32 -07001670 let handle = run_vcpu(
1671 vcpu,
1672 cpu_id as u32,
Daniel Verkamp107edb32019-04-05 09:58:48 -07001673 linux.vcpu_affinity.clone(),
Zach Reizner55a9e502018-10-03 10:22:32 -07001674 vcpu_thread_barrier.clone(),
1675 linux.io_bus.clone(),
1676 linux.mmio_bus.clone(),
1677 linux.exit_evt.try_clone().map_err(Error::CloneEventFd)?,
Zach Reizner795355a2019-01-16 17:37:57 -08001678 linux.vm.check_extension(Cap::KvmclockCtrl),
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001679 run_mode_arc.clone(),
Matt Delco84cf9c02019-10-07 22:38:13 -07001680 use_kvm_signals,
Zach Reizner55a9e502018-10-03 10:22:32 -07001681 )?;
Dylan Reid059a1882018-07-23 17:58:09 -07001682 vcpu_handles.push(handle);
1683 }
1684 vcpu_thread_barrier.wait();
1685
Zach Reizner39aa26b2017-12-12 18:03:23 -08001686 'poll: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07001687 let events = {
1688 match poll_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001689 Ok(v) => v,
1690 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001691 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001692 break;
1693 }
1694 }
1695 };
Zach Reiznera60744b2019-02-13 17:33:32 -08001696
1697 let mut vm_control_indices_to_remove = Vec::new();
Zach Reizner5bed0d22018-03-28 02:31:11 -07001698 for event in events.iter_readable() {
1699 match event.token() {
1700 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001701 info!("vcpu requested shutdown");
1702 break 'poll;
1703 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001704 Token::Stdin => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001705 let mut out = [0u8; 64];
1706 match stdin_lock.read_raw(&mut out[..]) {
1707 Ok(0) => {
1708 // Zero-length read indicates EOF. Remove from pollables.
Zach Reizner5bed0d22018-03-28 02:31:11 -07001709 let _ = poll_ctx.delete(&stdin_handle);
Zach Reizner55a9e502018-10-03 10:22:32 -07001710 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001711 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001712 warn!("error while reading stdin: {}", e);
Zach Reizner5bed0d22018-03-28 02:31:11 -07001713 let _ = poll_ctx.delete(&stdin_handle);
Zach Reizner55a9e502018-10-03 10:22:32 -07001714 }
Jakub Staronb6515a92019-06-05 15:18:25 -07001715 Ok(count) => {
1716 if let Some(ref stdio_serial) = linux.stdio_serial {
Trent Begin17ccaad2019-04-17 13:51:25 -06001717 stdio_serial
Trent Begin17ccaad2019-04-17 13:51:25 -06001718 .queue_input_bytes(&out[..count])
1719 .expect("failed to queue bytes into serial port");
1720 }
Jakub Staronb6515a92019-06-05 15:18:25 -07001721 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001722 }
1723 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001724 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001725 // Print all available siginfo structs, then exit the loop.
David Tolnayf5032762018-12-03 10:46:45 -08001726 while let Some(siginfo) = sigchld_fd.read().map_err(Error::SignalFd)? {
Zach Reizner3ba00982019-01-23 19:04:43 -08001727 let pid = siginfo.ssi_pid;
1728 let pid_label = match linux.pid_debug_label_map.get(&pid) {
1729 Some(label) => format!("{} (pid {})", label, pid),
1730 None => format!("pid {}", pid),
1731 };
David Tolnayf5032762018-12-03 10:46:45 -08001732 error!(
1733 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08001734 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08001735 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08001736 }
David Tolnayf5032762018-12-03 10:46:45 -08001737 break 'poll;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001738 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001739 Token::CheckAvailableMemory => {
1740 // Acknowledge the timer.
1741 freemem_timer.wait().map_err(Error::TimerFd)?;
1742 if current_balloon_memory == 0 {
1743 // Nothing to see here.
1744 if let Err(e) = freemem_timer.clear() {
1745 warn!("unable to clear available memory check timer: {}", e);
1746 }
1747 continue;
1748 }
1749
1750 // Otherwise see if we can free up some memory.
1751 let margin = file_to_u64(LOWMEM_MARGIN).map_err(Error::ReadLowmemMargin)?;
Zach Reizner55a9e502018-10-03 10:22:32 -07001752 let available =
1753 file_to_u64(LOWMEM_AVAILABLE).map_err(Error::ReadLowmemAvailable)?;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001754
1755 // `available` and `margin` are specified in MB while `balloon_memory_increment` is in
1756 // bytes. So to correctly compare them we need to turn the increment value into MB.
Zach Reizner55a9e502018-10-03 10:22:32 -07001757 if available >= margin + 2 * (balloon_memory_increment >> 20) {
1758 current_balloon_memory =
1759 if current_balloon_memory >= balloon_memory_increment {
1760 current_balloon_memory - balloon_memory_increment
1761 } else {
1762 0
1763 };
Jakub Staron1f828d72019-04-11 12:49:29 -07001764 let command = BalloonControlCommand::Adjust {
1765 num_bytes: current_balloon_memory,
1766 };
1767 if let Err(e) = balloon_host_socket.send(&command) {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001768 warn!("failed to send memory value to balloon device: {}", e);
1769 }
1770 }
1771 }
1772 Token::LowMemory => {
David Tolnay64cd5ea2019-04-15 15:56:35 -07001773 if let Some(low_mem) = &low_mem {
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001774 let old_balloon_memory = current_balloon_memory;
Zach Reizner55a9e502018-10-03 10:22:32 -07001775 current_balloon_memory = min(
1776 current_balloon_memory + balloon_memory_increment,
1777 max_balloon_memory,
1778 );
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001779 if current_balloon_memory != old_balloon_memory {
Jakub Staron1f828d72019-04-11 12:49:29 -07001780 let command = BalloonControlCommand::Adjust {
1781 num_bytes: current_balloon_memory,
1782 };
1783 if let Err(e) = balloon_host_socket.send(&command) {
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001784 warn!("failed to send memory value to balloon device: {}", e);
1785 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001786 }
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001787
1788 // Stop polling the lowmem device until the timer fires.
1789 poll_ctx.delete(low_mem).map_err(Error::PollContextDelete)?;
1790
1791 // Add some jitter to the timer so that if there are multiple VMs running
1792 // they don't all start ballooning at exactly the same time.
Daniel Prilik22006042019-01-14 14:19:04 -08001793 let lowmem_dur = Duration::from_millis(1000 + simple_rng.rng() % 200);
Zach Reizner55a9e502018-10-03 10:22:32 -07001794 lowmem_timer
1795 .reset(lowmem_dur, None)
1796 .map_err(Error::ResetTimerFd)?;
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001797
1798 // Also start a timer to check when we can start giving memory back. Do the
1799 // first check after a minute (with jitter) and subsequent checks after
1800 // every 30 seconds (with jitter).
Daniel Prilik22006042019-01-14 14:19:04 -08001801 let freemem_dur = Duration::from_secs(60 + simple_rng.rng() % 12);
1802 let freemem_int = Duration::from_secs(30 + simple_rng.rng() % 6);
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001803 freemem_timer
1804 .reset(freemem_dur, Some(freemem_int))
1805 .map_err(Error::ResetTimerFd)?;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001806 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001807 }
1808 Token::LowmemTimer => {
1809 // Acknowledge the timer.
1810 lowmem_timer.wait().map_err(Error::TimerFd)?;
1811
David Tolnay64cd5ea2019-04-15 15:56:35 -07001812 if let Some(low_mem) = &low_mem {
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001813 // Start polling the lowmem device again.
Zach Reizner55a9e502018-10-03 10:22:32 -07001814 poll_ctx
1815 .add(low_mem, Token::LowMemory)
1816 .map_err(Error::PollContextAdd)?;
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001817 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001818 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001819 Token::VmControlServer => {
1820 if let Some(socket_server) = &control_server_socket {
1821 match socket_server.accept() {
1822 Ok(socket) => {
1823 poll_ctx
1824 .add(
1825 &socket,
1826 Token::VmControl {
1827 index: control_sockets.len(),
1828 },
1829 )
1830 .map_err(Error::PollContextAdd)?;
Jakub Starond99cd0a2019-04-11 14:09:39 -07001831 control_sockets
1832 .push(TaggedControlSocket::Vm(MsgSocket::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08001833 }
1834 Err(e) => error!("failed to accept socket: {}", e),
1835 }
1836 }
1837 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001838 Token::VmControl { index } => {
Daniel Verkamp37c4a782019-01-04 10:44:17 -08001839 if let Some(socket) = control_sockets.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001840 match socket {
1841 TaggedControlSocket::Vm(socket) => match socket.recv() {
1842 Ok(request) => {
1843 let mut run_mode_opt = None;
1844 let response = request.execute(
1845 &mut run_mode_opt,
1846 &balloon_host_socket,
1847 disk_host_sockets,
1848 &usb_control_socket,
1849 );
1850 if let Err(e) = socket.send(&response) {
1851 error!("failed to send VmResponse: {}", e);
1852 }
1853 if let Some(run_mode) = run_mode_opt {
1854 info!("control socket changed run mode to {}", run_mode);
1855 match run_mode {
1856 VmRunMode::Exiting => {
1857 break 'poll;
1858 }
1859 other => {
1860 run_mode_arc.set_and_notify(other);
1861 for handle in &vcpu_handles {
1862 let _ = handle.kill(SIGRTMIN() + 0);
1863 }
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001864 }
1865 }
1866 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001867 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001868 Err(e) => {
1869 if let MsgError::BadRecvSize { actual: 0, .. } = e {
1870 vm_control_indices_to_remove.push(index);
1871 } else {
1872 error!("failed to recv VmRequest: {}", e);
1873 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001874 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001875 },
Gurchetan Singh53edb812019-05-22 08:57:16 -07001876 TaggedControlSocket::VmMemory(socket) => match socket.recv() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001877 Ok(request) => {
1878 let response =
1879 request.execute(&mut linux.vm, &mut linux.resources);
1880 if let Err(e) = socket.send(&response) {
Gurchetan Singh53edb812019-05-22 08:57:16 -07001881 error!("failed to send VmMemoryControlResponse: {}", e);
Jakub Starond99cd0a2019-04-11 14:09:39 -07001882 }
1883 }
1884 Err(e) => {
1885 if let MsgError::BadRecvSize { actual: 0, .. } = e {
1886 vm_control_indices_to_remove.push(index);
1887 } else {
Gurchetan Singh53edb812019-05-22 08:57:16 -07001888 error!("failed to recv VmMemoryControlRequest: {}", e);
Jakub Starond99cd0a2019-04-11 14:09:39 -07001889 }
1890 }
1891 },
Xiong Zhang2515b752019-09-19 10:29:02 +08001892 TaggedControlSocket::VmIrq(socket) => match socket.recv() {
1893 Ok(request) => {
1894 let response =
1895 request.execute(&mut linux.vm, &mut linux.resources);
1896 if let Err(e) = socket.send(&response) {
1897 error!("failed to send VmIrqResponse: {}", e);
1898 }
1899 }
1900 Err(e) => {
1901 if let MsgError::BadRecvSize { actual: 0, .. } = e {
1902 vm_control_indices_to_remove.push(index);
1903 } else {
1904 error!("failed to recv VmIrqRequest: {}", e);
1905 }
1906 }
1907 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08001908 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001909 }
1910 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001911 }
1912 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001913
Zach Reizner5bed0d22018-03-28 02:31:11 -07001914 for event in events.iter_hungup() {
Zach Reiznera60744b2019-02-13 17:33:32 -08001915 match event.token() {
1916 Token::Exit => {}
1917 Token::Stdin => {
1918 let _ = poll_ctx.delete(&stdin_handle);
1919 }
1920 Token::ChildSignal => {}
1921 Token::CheckAvailableMemory => {}
1922 Token::LowMemory => {}
1923 Token::LowmemTimer => {}
1924 Token::VmControlServer => {}
1925 Token::VmControl { index } => {
1926 // It's possible more data is readable and buffered while the socket is hungup,
1927 // so don't delete the socket from the poll context until we're sure all the
1928 // data is read.
Jakub Starond99cd0a2019-04-11 14:09:39 -07001929 match control_sockets
1930 .get(index)
1931 .map(|s| s.as_ref().get_readable_bytes())
1932 {
Zach Reiznera60744b2019-02-13 17:33:32 -08001933 Some(Ok(0)) | Some(Err(_)) => vm_control_indices_to_remove.push(index),
1934 Some(Ok(x)) => info!("control index {} has {} bytes readable", index, x),
1935 _ => {}
Zach Reizner55a9e502018-10-03 10:22:32 -07001936 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001937 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001938 }
1939 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001940
1941 // Sort in reverse so the highest indexes are removed first. This removal algorithm
Zide Chen89584072019-11-14 10:33:51 -08001942 // preserves correct indexes as each element is removed.
Zach Reiznera60744b2019-02-13 17:33:32 -08001943 vm_control_indices_to_remove.sort_unstable_by(|a, b| b.cmp(a));
1944 vm_control_indices_to_remove.dedup();
1945 for index in vm_control_indices_to_remove {
Zide Chen89584072019-11-14 10:33:51 -08001946 // Delete the socket from the `poll_ctx` synchronously. Otherwise, the kernel will do
1947 // this automatically when the FD inserted into the `poll_ctx` is closed after this
1948 // if-block, but this removal can be deferred unpredictably. In some instances where the
1949 // system is under heavy load, we can even get events returned by `poll_ctx` for an FD
1950 // that has already been closed. Because the token associated with that spurious event
1951 // now belongs to a different socket, the control loop will start to interact with
1952 // sockets that might not be ready to use. This can cause incorrect hangup detection or
1953 // blocking on a socket that will never be ready. See also: crbug.com/1019986
1954 if let Some(socket) = control_sockets.get(index) {
1955 poll_ctx.delete(socket).map_err(Error::PollContextDelete)?;
1956 }
1957
1958 // This line implicitly drops the socket at `index` when it gets returned by
1959 // `swap_remove`. After this line, the socket at `index` is not the one from
1960 // `vm_control_indices_to_remove`. Because of this socket's change in index, we need to
1961 // use `poll_ctx.modify` to change the associated index in its `Token::VmControl`.
Zach Reiznera60744b2019-02-13 17:33:32 -08001962 control_sockets.swap_remove(index);
1963 if let Some(socket) = control_sockets.get(index) {
1964 poll_ctx
Xiong Zhang44bb3dd2019-04-23 17:09:50 +08001965 .modify(
1966 socket,
1967 WatchingEvents::empty().set_read(),
1968 Token::VmControl { index },
1969 )
Zach Reiznera60744b2019-02-13 17:33:32 -08001970 .map_err(Error::PollContextAdd)?;
1971 }
1972 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001973 }
1974
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001975 // VCPU threads MUST see the VmRunMode flag, otherwise they may re-enter the VM.
1976 run_mode_arc.set_and_notify(VmRunMode::Exiting);
Dylan Reid059a1882018-07-23 17:58:09 -07001977 for handle in vcpu_handles {
Dmitry Torokhovcd405332018-02-16 16:25:54 -08001978 match handle.kill(SIGRTMIN() + 0) {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001979 Ok(_) => {
1980 if let Err(e) = handle.join() {
1981 error!("failed to join vcpu thread: {:?}", e);
1982 }
1983 }
David Tolnayb4bd00f2019-02-12 17:51:26 -08001984 Err(e) => error!("failed to kill vcpu thread: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -08001985 }
1986 }
1987
Daniel Verkamp94c35272019-09-12 13:31:30 -07001988 // Explicitly drop the VM structure here to allow the devices to clean up before the
1989 // control sockets are closed when this function exits.
1990 mem::drop(linux);
1991
Zach Reizner39aa26b2017-12-12 18:03:23 -08001992 stdin_lock
1993 .set_canon_mode()
1994 .expect("failed to restore canonical mode for terminal");
1995
1996 Ok(())
1997}