blob: 803a82ea20ed0d6bbbcbb59d0e2afa8fbc202be8 [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use std;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07006use std::cmp::min;
David Tolnayfdac5ed2019-03-08 16:56:14 -08007use std::error::Error as StdError;
Dylan Reid059a1882018-07-23 17:58:09 -07008use std::ffi::CStr;
David Tolnayc69f9752019-03-01 18:07:56 -08009use std::fmt::{self, Display};
Dylan Reid059a1882018-07-23 17:58:09 -070010use std::fs::{File, OpenOptions};
Zach Reizner55a9e502018-10-03 10:22:32 -070011use std::io::{self, stdin, Read};
David Tolnay2b089fc2019-03-04 15:33:22 -080012use std::net::Ipv4Addr;
Jakub Starond99cd0a2019-04-11 14:09:39 -070013use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
Zach Reiznera60744b2019-02-13 17:33:32 -080014use std::os::unix::net::UnixStream;
Zach Reizner39aa26b2017-12-12 18:03:23 -080015use std::path::{Path, PathBuf};
Chirantan Ekbote448516e2018-07-24 16:07:42 -070016use std::str;
Dylan Reid059a1882018-07-23 17:58:09 -070017use std::sync::{Arc, Barrier};
Zach Reizner39aa26b2017-12-12 18:03:23 -080018use std::thread;
19use std::thread::JoinHandle;
Daniel Prilik22006042019-01-14 14:19:04 -080020use std::time::{Duration, SystemTime, UNIX_EPOCH};
Zach Reizner39aa26b2017-12-12 18:03:23 -080021
David Tolnay41a6f842019-03-01 16:18:44 -080022use libc::{self, c_int, gid_t, uid_t};
Zach Reizner39aa26b2017-12-12 18:03:23 -080023
Dylan Reid3082e8e2019-01-07 10:33:48 -080024use audio_streams::DummyStreamSource;
David Tolnay2b089fc2019-03-04 15:33:22 -080025use devices::virtio::{self, VirtioDevice};
Jingkui Wang100e6e42019-03-08 20:41:57 -080026use devices::{self, HostBackendDeviceProvider, PciDevice, VirtioPciDevice, XhciController};
Zach Reizner39aa26b2017-12-12 18:03:23 -080027use io_jail::{self, Minijail};
Zach Reizner39aa26b2017-12-12 18:03:23 -080028use kvm::*;
paulhsiaf052cfe2019-01-22 15:22:25 +080029use libcras::CrasClient;
Zach Reiznera60744b2019-02-13 17:33:32 -080030use msg_socket::{MsgError, MsgReceiver, MsgSender, MsgSocket};
David Tolnay2b089fc2019-03-04 15:33:22 -080031use net_util::{Error as NetError, MacAddress, Tap};
Daniel Verkampf02fdd12018-10-10 17:25:14 -070032use qcow::{self, ImageType, QcowFile};
Daniel Prilik22006042019-01-14 14:19:04 -080033use rand_ish::SimpleRng;
David Tolnay3df35522019-03-11 12:36:30 -070034use remain::sorted;
Daniel Prilikd92f81a2019-03-26 14:28:19 -070035#[cfg(feature = "gpu-forward")]
36use resources::Alloc;
Zach Reizner6a8fdd92019-01-16 14:38:41 -080037use sync::{Condvar, Mutex};
Jakub Starond99cd0a2019-04-11 14:09:39 -070038use sys_util::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Zach Reiznera60744b2019-02-13 17:33:32 -080039use sys_util::{
David Tolnay633426a2019-04-12 12:18:35 -070040 self, block_signal, clear_signal, drop_capabilities, error, flock, get_blocked_signals,
41 get_group_id, get_user_id, getegid, geteuid, info, register_signal_handler, set_cpu_affinity,
42 validate_raw_fd, warn, EventFd, FlockOperation, GuestMemory, Killable, PollContext, PollToken,
43 SignalFd, Terminal, TimerFd, SIGRTMIN,
Zach Reiznera60744b2019-02-13 17:33:32 -080044};
Lepton Wu60893882018-11-21 11:06:18 -080045#[cfg(feature = "gpu-forward")]
46use sys_util::{GuestAddress, MemoryMapping, Protection};
Jason D. Clinton865323d2017-09-27 22:04:03 -060047use vhost;
Jakub Starone7c59052019-04-09 12:31:14 -070048use vm_control::{
Jakub Staron1f828d72019-04-11 12:49:29 -070049 BalloonControlCommand, BalloonControlRequestSocket, BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -070050 DiskControlCommand, DiskControlRequestSocket, DiskControlResponseSocket, DiskControlResult,
Jakub Starond99cd0a2019-04-11 14:09:39 -070051 UsbControlSocket, VmControlResponseSocket, VmRunMode, WlControlRequestSocket,
52 WlControlResponseSocket, WlDriverRequest, WlDriverResponse,
Jakub Starone7c59052019-04-09 12:31:14 -070053};
Zach Reizner39aa26b2017-12-12 18:03:23 -080054
Jorge E. Moreira99d3f082019-03-07 10:59:54 -080055use crate::{Config, DiskOption, TouchDeviceOption};
Zach Reizner39aa26b2017-12-12 18:03:23 -080056
Dylan Reid059a1882018-07-23 17:58:09 -070057use arch::{self, LinuxArch, RunnableLinuxVm, VirtioDeviceStub, VmComponents};
Sonny Raoed517d12018-02-13 22:09:43 -080058
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080059#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
60use aarch64::AArch64 as Arch;
Zach Reizner55a9e502018-10-03 10:22:32 -070061#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
62use x86_64::X8664arch as Arch;
Zach Reizner39aa26b2017-12-12 18:03:23 -080063
Lepton Wu60893882018-11-21 11:06:18 -080064#[cfg(feature = "gpu-forward")]
David Tolnayaecf9a42019-04-11 14:30:00 -070065use render_node_forward::*;
Lepton Wu60893882018-11-21 11:06:18 -080066#[cfg(not(feature = "gpu-forward"))]
67type RenderNodeHost = ();
68
David Tolnay3df35522019-03-11 12:36:30 -070069#[sorted]
Dylan Reid059a1882018-07-23 17:58:09 -070070#[derive(Debug)]
Zach Reizner39aa26b2017-12-12 18:03:23 -080071pub enum Error {
Lepton Wu60893882018-11-21 11:06:18 -080072 AddGpuDeviceMemory(sys_util::Error),
73 AllocateGpuDeviceAddress,
David Tolnay2b089fc2019-03-04 15:33:22 -080074 BalloonDeviceNew(virtio::BalloonError),
Zach Reizner39aa26b2017-12-12 18:03:23 -080075 BlockDeviceNew(sys_util::Error),
Mark Ryan6ed5aea2018-04-20 13:52:35 +010076 BlockSignal(sys_util::signal::Error),
David Tolnaybe034262019-03-04 17:48:36 -080077 BuildVm(<Arch as LinuxArch>::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -080078 ChownTpmStorage(sys_util::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080079 CloneEventFd(sys_util::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -080080 CreateCrasClient(libcras::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080081 CreateEventFd(sys_util::Error),
Zach Reizner5bed0d22018-03-28 02:31:11 -070082 CreatePollContext(sys_util::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080083 CreateSignalFd(sys_util::SignalFdError),
84 CreateSocket(io::Error),
Chirantan Ekbote49fa08f2018-11-16 13:26:53 -080085 CreateTapDevice(NetError),
Chirantan Ekbote448516e2018-07-24 16:07:42 -070086 CreateTimerFd(sys_util::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -080087 CreateTpmStorage(PathBuf, io::Error),
Jingkui Wang100e6e42019-03-08 20:41:57 -080088 CreateUsbProvider(devices::usb::host_backend::error::Error),
Daniel Verkampf02fdd12018-10-10 17:25:14 -070089 DetectImageType(qcow::Error),
Zach Reizner39aa26b2017-12-12 18:03:23 -080090 DeviceJail(io_jail::Error),
91 DevicePivotRoot(io_jail::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080092 Disk(io::Error),
Stephen Barberc79de2d2018-02-21 14:17:27 -080093 DiskImageLock(sys_util::Error),
Dmitry Torokhov71006072019-03-06 10:56:51 -080094 DropCapabilities(sys_util::Error),
Lepton Wu39133a02019-02-27 12:42:29 -080095 InputDeviceNew(virtio::InputError),
96 InputEventsOpen(std::io::Error),
Dylan Reid20566442018-04-02 15:06:15 -070097 InvalidFdPath,
Zach Reizner579bd2c2018-09-14 15:43:33 -070098 InvalidWaylandPath,
David Tolnayfd0971d2019-03-04 17:15:57 -080099 IoJail(io_jail::Error),
David Tolnayfdac5ed2019-03-08 16:56:14 -0800100 LoadKernel(Box<dyn StdError>),
David Tolnay2b089fc2019-03-04 15:33:22 -0800101 NetDeviceNew(virtio::NetError),
Tristan Muntsinger4133b012018-12-21 16:01:56 -0800102 OpenAndroidFstab(PathBuf, io::Error),
Daniel Verkampe403f5c2018-12-11 16:29:26 -0800103 OpenInitrd(PathBuf, io::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800104 OpenKernel(PathBuf, io::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -0800105 OpenVinput(PathBuf, io::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800106 P9DeviceNew(virtio::P9Error),
Lepton Wu39133a02019-02-27 12:42:29 -0800107 PivotRootDoesntExist(&'static str),
Zach Reizner5bed0d22018-03-28 02:31:11 -0700108 PollContextAdd(sys_util::Error),
Chirantan Ekbote448516e2018-07-24 16:07:42 -0700109 PollContextDelete(sys_util::Error),
Dylan Reid88624f82018-01-11 09:20:16 -0800110 QcowDeviceCreate(qcow::Error),
Chirantan Ekbote448516e2018-07-24 16:07:42 -0700111 ReadLowmemAvailable(io::Error),
112 ReadLowmemMargin(io::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700113 RegisterBalloon(arch::DeviceRegistrationError),
114 RegisterBlock(arch::DeviceRegistrationError),
115 RegisterGpu(arch::DeviceRegistrationError),
116 RegisterNet(arch::DeviceRegistrationError),
117 RegisterP9(arch::DeviceRegistrationError),
118 RegisterRng(arch::DeviceRegistrationError),
Mark Ryan6ed5aea2018-04-20 13:52:35 +0100119 RegisterSignalHandler(sys_util::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700120 RegisterWayland(arch::DeviceRegistrationError),
Lepton Wu60893882018-11-21 11:06:18 -0800121 ReserveGpuMemory(sys_util::MmapError),
122 ReserveMemory(sys_util::Error),
Chirantan Ekbote448516e2018-07-24 16:07:42 -0700123 ResetTimerFd(sys_util::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800124 RngDeviceNew(virtio::RngError),
Zach Reizner8fb52112017-12-13 16:04:39 -0800125 SettingGidMap(io_jail::Error),
126 SettingUidMap(io_jail::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800127 SignalFd(sys_util::SignalFdError),
128 SpawnVcpu(io::Error),
Chirantan Ekbote448516e2018-07-24 16:07:42 -0700129 TimerFd(sys_util::Error),
Chirantan Ekbote2d292332018-11-16 11:35:24 -0800130 ValidateRawFd(sys_util::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800131 VhostNetDeviceNew(virtio::vhost::Error),
132 VhostVsockDeviceNew(virtio::vhost::Error),
Daniel Verkamp56f283b2018-10-05 11:40:59 -0700133 VirtioPciDev(sys_util::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800134 WaylandDeviceNew(sys_util::Error),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800135}
136
David Tolnayc69f9752019-03-01 18:07:56 -0800137impl Display for Error {
David Tolnay3df35522019-03-11 12:36:30 -0700138 #[remain::check]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800139 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
David Tolnayc69f9752019-03-01 18:07:56 -0800140 use self::Error::*;
141
David Tolnay3df35522019-03-11 12:36:30 -0700142 #[sorted]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800143 match self {
Lepton Wu60893882018-11-21 11:06:18 -0800144 AddGpuDeviceMemory(e) => write!(f, "failed to add gpu device memory: {}", e),
145 AllocateGpuDeviceAddress => write!(f, "failed to allocate gpu device guest address"),
David Tolnayc69f9752019-03-01 18:07:56 -0800146 BalloonDeviceNew(e) => write!(f, "failed to create balloon: {}", e),
147 BlockDeviceNew(e) => write!(f, "failed to create block device: {}", e),
148 BlockSignal(e) => write!(f, "failed to block signal: {}", e),
David Tolnaybe034262019-03-04 17:48:36 -0800149 BuildVm(e) => write!(f, "The architecture failed to build the vm: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800150 ChownTpmStorage(e) => write!(f, "failed to chown tpm storage: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800151 CloneEventFd(e) => write!(f, "failed to clone eventfd: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800152 CreateCrasClient(e) => write!(f, "failed to create cras client: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800153 CreateEventFd(e) => write!(f, "failed to create eventfd: {}", e),
154 CreatePollContext(e) => write!(f, "failed to create poll context: {}", e),
155 CreateSignalFd(e) => write!(f, "failed to create signalfd: {}", e),
156 CreateSocket(e) => write!(f, "failed to create socket: {}", e),
157 CreateTapDevice(e) => write!(f, "failed to create tap device: {}", e),
158 CreateTimerFd(e) => write!(f, "failed to create timerfd: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800159 CreateTpmStorage(p, e) => {
160 write!(f, "failed to create tpm storage dir {}: {}", p.display(), e)
161 }
Jingkui Wang100e6e42019-03-08 20:41:57 -0800162 CreateUsbProvider(e) => write!(f, "failed to create usb provider: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800163 DetectImageType(e) => write!(f, "failed to detect disk image type: {}", e),
164 DeviceJail(e) => write!(f, "failed to jail device: {}", e),
165 DevicePivotRoot(e) => write!(f, "failed to pivot root device: {}", e),
166 Disk(e) => write!(f, "failed to load disk image: {}", e),
167 DiskImageLock(e) => write!(f, "failed to lock disk image: {}", e),
Dmitry Torokhov71006072019-03-06 10:56:51 -0800168 DropCapabilities(e) => write!(f, "failed to drop process capabilities: {}", e),
David Tolnay64cd5ea2019-04-15 15:56:35 -0700169 InputDeviceNew(e) => write!(f, "failed to set up input device: {}", e),
170 InputEventsOpen(e) => write!(f, "failed to open event device: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800171 InvalidFdPath => write!(f, "failed parsing a /proc/self/fd/*"),
172 InvalidWaylandPath => write!(f, "wayland socket path has no parent or file name"),
David Tolnayfd0971d2019-03-04 17:15:57 -0800173 IoJail(e) => write!(f, "{}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800174 LoadKernel(e) => write!(f, "failed to load kernel: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800175 NetDeviceNew(e) => write!(f, "failed to set up virtio networking: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800176 OpenAndroidFstab(p, e) => write!(
David Tolnayb4bd00f2019-02-12 17:51:26 -0800177 f,
178 "failed to open android fstab file {}: {}",
179 p.display(),
180 e
181 ),
David Tolnay3df35522019-03-11 12:36:30 -0700182 OpenInitrd(p, e) => write!(f, "failed to open initrd {}: {}", p.display(), e),
183 OpenKernel(p, e) => write!(f, "failed to open kernel image {}: {}", p.display(), e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800184 OpenVinput(p, e) => write!(f, "failed to open vinput device {}: {}", p.display(), e),
David Tolnayc69f9752019-03-01 18:07:56 -0800185 P9DeviceNew(e) => write!(f, "failed to create 9p device: {}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800186 PivotRootDoesntExist(p) => write!(f, "{} doesn't exist, can't jail devices.", p),
David Tolnayc69f9752019-03-01 18:07:56 -0800187 PollContextAdd(e) => write!(f, "failed to add fd to poll context: {}", e),
188 PollContextDelete(e) => write!(f, "failed to remove fd from poll context: {}", e),
189 QcowDeviceCreate(e) => write!(f, "failed to read qcow formatted file {}", e),
190 ReadLowmemAvailable(e) => write!(
Zach Reizner55a9e502018-10-03 10:22:32 -0700191 f,
192 "failed to read /sys/kernel/mm/chromeos-low_mem/available: {}",
193 e
194 ),
David Tolnayc69f9752019-03-01 18:07:56 -0800195 ReadLowmemMargin(e) => write!(
Zach Reizner55a9e502018-10-03 10:22:32 -0700196 f,
197 "failed to read /sys/kernel/mm/chromeos-low_mem/margin: {}",
198 e
199 ),
David Tolnayc69f9752019-03-01 18:07:56 -0800200 RegisterBalloon(e) => write!(f, "error registering balloon device: {}", e),
201 RegisterBlock(e) => write!(f, "error registering block device: {}", e),
202 RegisterGpu(e) => write!(f, "error registering gpu device: {}", e),
203 RegisterNet(e) => write!(f, "error registering net device: {}", e),
204 RegisterP9(e) => write!(f, "error registering 9p device: {}", e),
205 RegisterRng(e) => write!(f, "error registering rng device: {}", e),
206 RegisterSignalHandler(e) => write!(f, "error registering signal handler: {}", e),
207 RegisterWayland(e) => write!(f, "error registering wayland device: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800208 ReserveGpuMemory(e) => write!(f, "failed to reserve gpu memory: {}", e),
209 ReserveMemory(e) => write!(f, "failed to reserve memory: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800210 ResetTimerFd(e) => write!(f, "failed to reset timerfd: {}", e),
211 RngDeviceNew(e) => write!(f, "failed to set up rng: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800212 SettingGidMap(e) => write!(f, "error setting GID map: {}", e),
213 SettingUidMap(e) => write!(f, "error setting UID map: {}", e),
214 SignalFd(e) => write!(f, "failed to read signal fd: {}", e),
215 SpawnVcpu(e) => write!(f, "failed to spawn VCPU thread: {}", e),
216 TimerFd(e) => write!(f, "failed to read timer fd: {}", e),
217 ValidateRawFd(e) => write!(f, "failed to validate raw fd: {}", e),
218 VhostNetDeviceNew(e) => write!(f, "failed to set up vhost networking: {}", e),
219 VhostVsockDeviceNew(e) => write!(f, "failed to set up virtual socket device: {}", e),
220 VirtioPciDev(e) => write!(f, "failed to create virtio pci dev: {}", e),
221 WaylandDeviceNew(e) => write!(f, "failed to create wayland device: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800222 }
223 }
224}
225
David Tolnayfd0971d2019-03-04 17:15:57 -0800226impl From<io_jail::Error> for Error {
227 fn from(err: io_jail::Error) -> Self {
228 Error::IoJail(err)
229 }
230}
231
David Tolnayc69f9752019-03-01 18:07:56 -0800232impl std::error::Error for Error {}
Dylan Reid059a1882018-07-23 17:58:09 -0700233
Zach Reizner39aa26b2017-12-12 18:03:23 -0800234type Result<T> = std::result::Result<T, Error>;
235
Jakub Starond99cd0a2019-04-11 14:09:39 -0700236enum TaggedControlSocket {
237 Vm(VmControlResponseSocket),
238 Wayland(WlControlResponseSocket),
239}
240
241impl AsRef<UnixSeqpacket> for TaggedControlSocket {
242 fn as_ref(&self) -> &UnixSeqpacket {
243 use self::TaggedControlSocket::*;
244 match &self {
245 Vm(ref socket) => socket,
246 Wayland(ref socket) => socket,
247 }
248 }
249}
250
251impl AsRawFd for TaggedControlSocket {
252 fn as_raw_fd(&self) -> RawFd {
253 self.as_ref().as_raw_fd()
254 }
255}
256
Zach Reizner39aa26b2017-12-12 18:03:23 -0800257fn create_base_minijail(root: &Path, seccomp_policy: &Path) -> Result<Minijail> {
258 // All child jails run in a new user namespace without any users mapped,
259 // they run as nobody unless otherwise configured.
David Tolnay5bbbf612018-12-01 17:49:30 -0800260 let mut j = Minijail::new().map_err(Error::DeviceJail)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -0800261 j.namespace_pids();
262 j.namespace_user();
263 j.namespace_user_disable_setgroups();
264 // Don't need any capabilities.
265 j.use_caps(0);
266 // Create a new mount namespace with an empty root FS.
267 j.namespace_vfs();
David Tolnay5bbbf612018-12-01 17:49:30 -0800268 j.enter_pivot_root(root).map_err(Error::DevicePivotRoot)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -0800269 // Run in an empty network namespace.
270 j.namespace_net();
271 // Apply the block device seccomp policy.
272 j.no_new_privs();
Stephen Barber3b1d8a52018-01-06 17:34:51 -0800273 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP, which will correctly kill
274 // the entire device process if a worker thread commits a seccomp violation.
275 j.set_seccomp_filter_tsync();
Zach Reizner043ddc52018-04-03 20:47:21 -0700276 #[cfg(debug_assertions)]
277 j.log_seccomp_filter_failures();
Zach Reizner39aa26b2017-12-12 18:03:23 -0800278 j.parse_seccomp_filters(seccomp_policy)
David Tolnay5bbbf612018-12-01 17:49:30 -0800279 .map_err(Error::DeviceJail)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -0800280 j.use_seccomp_filter();
281 // Don't do init setup.
282 j.run_as_init();
283 Ok(j)
284}
285
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800286fn simple_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
Lepton Wu9105e9f2019-03-14 11:38:31 -0700287 if cfg.sandbox {
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800288 let pivot_root: &str = option_env!("DEFAULT_PIVOT_ROOT").unwrap_or("/var/empty");
289 // A directory for a jailed device's pivot root.
290 let root_path = Path::new(pivot_root);
291 if !root_path.exists() {
292 return Err(Error::PivotRootDoesntExist(pivot_root));
293 }
294 let policy_path: PathBuf = cfg.seccomp_policy_dir.join(policy);
295 Ok(Some(create_base_minijail(root_path, &policy_path)?))
296 } else {
297 Ok(None)
298 }
299}
300
David Tolnayfd0971d2019-03-04 17:15:57 -0800301type DeviceResult<T = VirtioDeviceStub> = std::result::Result<T, Error>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800302
303fn create_block_device(
304 cfg: &Config,
305 disk: &DiskOption,
Jakub Staronecf81e02019-04-11 11:43:39 -0700306 disk_device_socket: DiskControlResponseSocket,
David Tolnay2b089fc2019-03-04 15:33:22 -0800307) -> DeviceResult {
308 // Special case '/proc/self/fd/*' paths. The FD is already open, just use it.
309 let raw_image: File = if disk.path.parent() == Some(Path::new("/proc/self/fd")) {
310 // Safe because we will validate |raw_fd|.
311 unsafe { File::from_raw_fd(raw_fd_from_path(&disk.path)?) }
312 } else {
313 OpenOptions::new()
314 .read(true)
315 .write(!disk.read_only)
316 .open(&disk.path)
317 .map_err(Error::Disk)?
318 };
319 // Lock the disk image to prevent other crosvm instances from using it.
320 let lock_op = if disk.read_only {
321 FlockOperation::LockShared
322 } else {
323 FlockOperation::LockExclusive
324 };
325 flock(&raw_image, lock_op, true).map_err(Error::DiskImageLock)?;
326
327 let image_type = qcow::detect_image_type(&raw_image).map_err(Error::DetectImageType)?;
328 let dev = match image_type {
329 ImageType::Raw => {
330 // Access as a raw block device.
331 let dev = virtio::Block::new(raw_image, disk.read_only, Some(disk_device_socket))
332 .map_err(Error::BlockDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800333 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800334 }
335 ImageType::Qcow2 => {
336 // Valid qcow header present
337 let qcow_image = QcowFile::from(raw_image).map_err(Error::QcowDeviceCreate)?;
338 let dev = virtio::Block::new(qcow_image, disk.read_only, Some(disk_device_socket))
339 .map_err(Error::BlockDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800340 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800341 }
342 };
343
344 Ok(VirtioDeviceStub {
345 dev,
346 jail: simple_jail(&cfg, "block_device.policy")?,
347 })
348}
349
350fn create_rng_device(cfg: &Config) -> DeviceResult {
351 let dev = virtio::Rng::new().map_err(Error::RngDeviceNew)?;
352
353 Ok(VirtioDeviceStub {
354 dev: Box::new(dev),
355 jail: simple_jail(&cfg, "rng_device.policy")?,
356 })
357}
358
359#[cfg(feature = "tpm")]
360fn create_tpm_device(cfg: &Config) -> DeviceResult {
361 use std::ffi::CString;
362 use std::fs;
363 use std::process;
364 use sys_util::chown;
365
366 let tpm_storage: PathBuf;
367 let mut tpm_jail = simple_jail(&cfg, "tpm_device.policy")?;
368
369 match &mut tpm_jail {
370 Some(jail) => {
371 // Create a tmpfs in the device's root directory for tpm
372 // simulator storage. The size is 20*1024, or 20 KB.
373 jail.mount_with_data(
374 Path::new("none"),
375 Path::new("/"),
376 "tmpfs",
377 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
378 "size=20480",
379 )?;
380
381 let crosvm_ids = add_crosvm_user_to_jail(jail, "tpm")?;
382
383 let pid = process::id();
384 let tpm_pid_dir = format!("/run/vm/tpm.{}", pid);
385 tpm_storage = Path::new(&tpm_pid_dir).to_owned();
David Tolnayfd0971d2019-03-04 17:15:57 -0800386 fs::create_dir_all(&tpm_storage)
387 .map_err(|e| Error::CreateTpmStorage(tpm_storage.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800388 let tpm_pid_dir_c = CString::new(tpm_pid_dir).expect("no nul bytes");
David Tolnayfd0971d2019-03-04 17:15:57 -0800389 chown(&tpm_pid_dir_c, crosvm_ids.uid, crosvm_ids.gid)
390 .map_err(Error::ChownTpmStorage)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800391
392 jail.mount_bind(&tpm_storage, &tpm_storage, true)?;
393 }
394 None => {
395 // Path used inside cros_sdk which does not have /run/vm.
396 tpm_storage = Path::new("/tmp/tpm-simulator").to_owned();
397 }
398 }
399
400 let dev = virtio::Tpm::new(tpm_storage);
401
402 Ok(VirtioDeviceStub {
403 dev: Box::new(dev),
404 jail: tpm_jail,
405 })
406}
407
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800408fn create_single_touch_device(cfg: &Config, single_touch_spec: &TouchDeviceOption) -> DeviceResult {
409 let socket = create_input_socket(&single_touch_spec.path).map_err(|e| {
410 error!("failed configuring virtio single touch: {:?}", e);
411 e
412 })?;
413
414 let dev = virtio::new_single_touch(socket, single_touch_spec.width, single_touch_spec.height)
415 .map_err(Error::InputDeviceNew)?;
416 Ok(VirtioDeviceStub {
417 dev: Box::new(dev),
418 jail: simple_jail(&cfg, "input_device.policy")?,
419 })
420}
421
422fn create_trackpad_device(cfg: &Config, trackpad_spec: &TouchDeviceOption) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800423 let socket = create_input_socket(&trackpad_spec.path).map_err(|e| {
424 error!("failed configuring virtio trackpad: {}", e);
425 e
426 })?;
427
428 let dev = virtio::new_trackpad(socket, trackpad_spec.width, trackpad_spec.height)
429 .map_err(Error::InputDeviceNew)?;
430
431 Ok(VirtioDeviceStub {
432 dev: Box::new(dev),
433 jail: simple_jail(&cfg, "input_device.policy")?,
434 })
435}
436
437fn create_mouse_device(cfg: &Config, mouse_socket: &Path) -> DeviceResult {
438 let socket = create_input_socket(&mouse_socket).map_err(|e| {
439 error!("failed configuring virtio mouse: {}", e);
440 e
441 })?;
442
443 let dev = virtio::new_mouse(socket).map_err(Error::InputDeviceNew)?;
444
445 Ok(VirtioDeviceStub {
446 dev: Box::new(dev),
447 jail: simple_jail(&cfg, "input_device.policy")?,
448 })
449}
450
451fn create_keyboard_device(cfg: &Config, keyboard_socket: &Path) -> DeviceResult {
452 let socket = create_input_socket(&keyboard_socket).map_err(|e| {
453 error!("failed configuring virtio keyboard: {}", e);
454 e
455 })?;
456
457 let dev = virtio::new_keyboard(socket).map_err(Error::InputDeviceNew)?;
458
459 Ok(VirtioDeviceStub {
460 dev: Box::new(dev),
461 jail: simple_jail(&cfg, "input_device.policy")?,
462 })
463}
464
465fn create_vinput_device(cfg: &Config, dev_path: &Path) -> DeviceResult {
466 let dev_file = OpenOptions::new()
467 .read(true)
468 .write(true)
469 .open(dev_path)
David Tolnayfd0971d2019-03-04 17:15:57 -0800470 .map_err(|e| Error::OpenVinput(dev_path.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800471
472 let dev = virtio::new_evdev(dev_file).map_err(Error::InputDeviceNew)?;
473
474 Ok(VirtioDeviceStub {
475 dev: Box::new(dev),
476 jail: simple_jail(&cfg, "input_device.policy")?,
477 })
478}
479
Jakub Staron1f828d72019-04-11 12:49:29 -0700480fn create_balloon_device(cfg: &Config, socket: BalloonControlResponseSocket) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800481 let dev = virtio::Balloon::new(socket).map_err(Error::BalloonDeviceNew)?;
482
483 Ok(VirtioDeviceStub {
484 dev: Box::new(dev),
485 jail: simple_jail(&cfg, "balloon_device.policy")?,
486 })
487}
488
489fn create_tap_net_device(cfg: &Config, tap_fd: RawFd) -> DeviceResult {
490 // Safe because we ensure that we get a unique handle to the fd.
491 let tap = unsafe {
492 Tap::from_raw_fd(validate_raw_fd(tap_fd).map_err(Error::ValidateRawFd)?)
493 .map_err(Error::CreateTapDevice)?
494 };
495
496 let dev = virtio::Net::from(tap).map_err(Error::NetDeviceNew)?;
497
498 Ok(VirtioDeviceStub {
499 dev: Box::new(dev),
500 jail: simple_jail(&cfg, "net_device.policy")?,
501 })
502}
503
504fn create_net_device(
505 cfg: &Config,
506 host_ip: Ipv4Addr,
507 netmask: Ipv4Addr,
508 mac_address: MacAddress,
509 mem: &GuestMemory,
510) -> DeviceResult {
511 let dev = if cfg.vhost_net {
512 let dev =
513 virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(host_ip, netmask, mac_address, mem)
514 .map_err(Error::VhostNetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800515 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800516 } else {
517 let dev =
518 virtio::Net::<Tap>::new(host_ip, netmask, mac_address).map_err(Error::NetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800519 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800520 };
521
522 let policy = if cfg.vhost_net {
523 "vhost_net_device.policy"
524 } else {
525 "net_device.policy"
526 };
527
528 Ok(VirtioDeviceStub {
529 dev,
530 jail: simple_jail(&cfg, policy)?,
531 })
532}
533
534#[cfg(feature = "gpu")]
535fn create_gpu_device(
536 cfg: &Config,
537 exit_evt: &EventFd,
538 gpu_socket: virtio::resource_bridge::ResourceResponseSocket,
539 wayland_socket_path: &Path,
540) -> DeviceResult {
541 let jailed_wayland_path = Path::new("/wayland-0");
542
543 let dev = virtio::Gpu::new(
544 exit_evt.try_clone().map_err(Error::CloneEventFd)?,
545 Some(gpu_socket),
Lepton Wu9105e9f2019-03-14 11:38:31 -0700546 if cfg.sandbox {
David Tolnay2b089fc2019-03-04 15:33:22 -0800547 &jailed_wayland_path
548 } else {
549 wayland_socket_path
550 },
551 );
552
553 let jail = match simple_jail(&cfg, "gpu_device.policy")? {
554 Some(mut jail) => {
555 // Create a tmpfs in the device's root directory so that we can bind mount the
556 // dri directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
557 jail.mount_with_data(
558 Path::new("none"),
559 Path::new("/"),
560 "tmpfs",
561 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
562 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800563 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800564
565 // Device nodes required for DRM.
566 let sys_dev_char_path = Path::new("/sys/dev/char");
David Tolnayfd0971d2019-03-04 17:15:57 -0800567 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800568 let sys_devices_path = Path::new("/sys/devices");
David Tolnayfd0971d2019-03-04 17:15:57 -0800569 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800570 let drm_dri_path = Path::new("/dev/dri");
David Tolnayfd0971d2019-03-04 17:15:57 -0800571 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800572
573 // Libraries that are required when mesa drivers are dynamically loaded.
574 let lib_path = Path::new("/lib64");
David Tolnayfd0971d2019-03-04 17:15:57 -0800575 jail.mount_bind(lib_path, lib_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800576 let usr_lib_path = Path::new("/usr/lib64");
David Tolnayfd0971d2019-03-04 17:15:57 -0800577 jail.mount_bind(usr_lib_path, usr_lib_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800578
579 // Bind mount the wayland socket into jail's root. This is necessary since each
580 // new wayland context must open() the socket.
David Tolnayfd0971d2019-03-04 17:15:57 -0800581 jail.mount_bind(wayland_socket_path, jailed_wayland_path, true)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800582
583 add_crosvm_user_to_jail(&mut jail, "gpu")?;
584
585 Some(jail)
586 }
587 None => None,
588 };
589
590 Ok(VirtioDeviceStub {
591 dev: Box::new(dev),
592 jail,
593 })
594}
595
596fn create_wayland_device(
597 cfg: &Config,
598 socket_path: &Path,
Jakub Starond99cd0a2019-04-11 14:09:39 -0700599 socket: WlControlRequestSocket,
David Tolnay2b089fc2019-03-04 15:33:22 -0800600 resource_bridge: Option<virtio::resource_bridge::ResourceRequestSocket>,
601) -> DeviceResult {
602 let wayland_socket_dir = socket_path.parent().ok_or(Error::InvalidWaylandPath)?;
603 let wayland_socket_name = socket_path.file_name().ok_or(Error::InvalidWaylandPath)?;
604 let jailed_wayland_dir = Path::new("/wayland");
605 let jailed_wayland_path = jailed_wayland_dir.join(wayland_socket_name);
606
607 let dev = virtio::Wl::new(
Lepton Wu9105e9f2019-03-14 11:38:31 -0700608 if cfg.sandbox {
David Tolnay2b089fc2019-03-04 15:33:22 -0800609 &jailed_wayland_path
610 } else {
611 socket_path
612 },
613 socket,
614 resource_bridge,
615 )
616 .map_err(Error::WaylandDeviceNew)?;
617
618 let jail = match simple_jail(&cfg, "wl_device.policy")? {
619 Some(mut jail) => {
620 // Create a tmpfs in the device's root directory so that we can bind mount the wayland
621 // socket directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
622 jail.mount_with_data(
623 Path::new("none"),
624 Path::new("/"),
625 "tmpfs",
626 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
627 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800628 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800629
630 // Bind mount the wayland socket's directory into jail's root. This is necessary since
631 // each new wayland context must open() the socket. If the wayland socket is ever
632 // destroyed and remade in the same host directory, new connections will be possible
633 // without restarting the wayland device.
David Tolnayfd0971d2019-03-04 17:15:57 -0800634 jail.mount_bind(wayland_socket_dir, jailed_wayland_dir, true)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800635
636 add_crosvm_user_to_jail(&mut jail, "Wayland")?;
637
638 Some(jail)
639 }
640 None => None,
641 };
642
643 Ok(VirtioDeviceStub {
644 dev: Box::new(dev),
645 jail,
646 })
647}
648
649fn create_vhost_vsock_device(cfg: &Config, cid: u64, mem: &GuestMemory) -> DeviceResult {
650 let dev = virtio::vhost::Vsock::new(cid, mem).map_err(Error::VhostVsockDeviceNew)?;
651
652 Ok(VirtioDeviceStub {
653 dev: Box::new(dev),
654 jail: simple_jail(&cfg, "vhost_vsock_device.policy")?,
655 })
656}
657
658fn create_9p_device(cfg: &Config, chronos: Ids, src: &Path, tag: &str) -> DeviceResult {
659 let (jail, root) = match simple_jail(&cfg, "9p_device.policy")? {
660 Some(mut jail) => {
661 // The shared directory becomes the root of the device's file system.
662 let root = Path::new("/");
David Tolnayfd0971d2019-03-04 17:15:57 -0800663 jail.mount_bind(src, root, true)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800664
665 // Set the uid/gid for the jailed process, and give a basic id map. This
666 // is required for the above bind mount to work.
667 jail.change_uid(chronos.uid);
668 jail.change_gid(chronos.gid);
669 jail.uidmap(&format!("{0} {0} 1", chronos.uid))
670 .map_err(Error::SettingUidMap)?;
671 jail.gidmap(&format!("{0} {0} 1", chronos.gid))
672 .map_err(Error::SettingGidMap)?;
673
674 (Some(jail), root)
675 }
676 None => {
677 // There's no bind mount so we tell the server to treat the source directory as the
David Tolnay9deb7d72019-03-05 18:25:44 -0800678 // root.
David Tolnay2b089fc2019-03-04 15:33:22 -0800679 (None, src)
680 }
681 };
682
683 let dev = virtio::P9::new(root, tag).map_err(Error::P9DeviceNew)?;
684
685 Ok(VirtioDeviceStub {
686 dev: Box::new(dev),
687 jail,
688 })
689}
690
691fn create_virtio_devices(
692 cfg: &Config,
Zach Reizner55a9e502018-10-03 10:22:32 -0700693 mem: &GuestMemory,
694 _exit_evt: &EventFd,
Jakub Starond99cd0a2019-04-11 14:09:39 -0700695 wayland_device_socket: WlControlRequestSocket,
Jakub Staron1f828d72019-04-11 12:49:29 -0700696 balloon_device_socket: BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -0700697 disk_device_sockets: &mut Vec<DiskControlResponseSocket>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800698) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -0700699 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -0800700
Zach Reizner8fb52112017-12-13 16:04:39 -0800701 for disk in &cfg.disks {
Daniel Verkamp92f73d72018-12-04 13:17:46 -0800702 let disk_device_socket = disk_device_sockets.remove(0);
David Tolnay2b089fc2019-03-04 15:33:22 -0800703 devs.push(create_block_device(cfg, disk, disk_device_socket)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800704 }
705
David Tolnay2b089fc2019-03-04 15:33:22 -0800706 devs.push(create_rng_device(cfg)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800707
David Tolnayde6b29a2018-12-20 11:49:46 -0800708 #[cfg(feature = "tpm")]
709 {
David Tolnay43f8e212019-02-13 17:28:16 -0800710 if cfg.software_tpm {
David Tolnay2b089fc2019-03-04 15:33:22 -0800711 devs.push(create_tpm_device(cfg)?);
David Tolnay43f8e212019-02-13 17:28:16 -0800712 }
David Tolnayde6b29a2018-12-20 11:49:46 -0800713 }
714
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800715 if let Some(single_touch_spec) = &cfg.virtio_single_touch {
716 devs.push(create_single_touch_device(cfg, single_touch_spec)?);
717 }
718
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800719 if let Some(trackpad_spec) = &cfg.virtio_trackpad {
David Tolnay2b089fc2019-03-04 15:33:22 -0800720 devs.push(create_trackpad_device(cfg, trackpad_spec)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800721 }
722
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800723 if let Some(mouse_socket) = &cfg.virtio_mouse {
David Tolnay2b089fc2019-03-04 15:33:22 -0800724 devs.push(create_mouse_device(cfg, mouse_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800725 }
726
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800727 if let Some(keyboard_socket) = &cfg.virtio_keyboard {
David Tolnay2b089fc2019-03-04 15:33:22 -0800728 devs.push(create_keyboard_device(cfg, keyboard_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800729 }
730
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800731 for dev_path in &cfg.virtio_input_evdevs {
David Tolnay2b089fc2019-03-04 15:33:22 -0800732 devs.push(create_vinput_device(cfg, dev_path)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800733 }
734
David Tolnay2b089fc2019-03-04 15:33:22 -0800735 devs.push(create_balloon_device(cfg, balloon_device_socket)?);
Dylan Reid295ccac2017-11-06 14:06:24 -0800736
Zach Reizner39aa26b2017-12-12 18:03:23 -0800737 // We checked above that if the IP is defined, then the netmask is, too.
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800738 for tap_fd in &cfg.tap_fd {
David Tolnay2b089fc2019-03-04 15:33:22 -0800739 devs.push(create_tap_net_device(cfg, *tap_fd)?);
Jorge E. Moreirab7952802019-02-12 16:43:05 -0800740 }
741
David Tolnay2b089fc2019-03-04 15:33:22 -0800742 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
743 (cfg.host_ip, cfg.netmask, cfg.mac_address)
744 {
745 devs.push(create_net_device(cfg, host_ip, netmask, mac_address, mem)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800746 }
747
David Tolnayfa701712019-02-13 16:42:54 -0800748 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
David Tolnay2b089fc2019-03-04 15:33:22 -0800749 let mut resource_bridge_wl_socket = None::<virtio::resource_bridge::ResourceRequestSocket>;
David Tolnayfa701712019-02-13 16:42:54 -0800750
Zach Reizner3a8100a2017-09-13 19:15:43 -0700751 #[cfg(feature = "gpu")]
752 {
753 if cfg.gpu {
David Tolnay2b089fc2019-03-04 15:33:22 -0800754 if let Some(wayland_socket_path) = &cfg.wayland_socket_path {
Zach Reizneraa575662018-08-15 10:46:32 -0700755 let (wl_socket, gpu_socket) =
David Tolnay2b089fc2019-03-04 15:33:22 -0800756 virtio::resource_bridge::pair().map_err(Error::CreateSocket)?;
Zach Reizneraa575662018-08-15 10:46:32 -0700757 resource_bridge_wl_socket = Some(wl_socket);
758
David Tolnay2b089fc2019-03-04 15:33:22 -0800759 devs.push(create_gpu_device(
760 cfg,
761 _exit_evt,
762 gpu_socket,
763 wayland_socket_path,
764 )?);
David Rileyb22b6132018-08-20 08:11:42 -0700765 }
Zach Reizner3a8100a2017-09-13 19:15:43 -0700766 }
767 }
768
Zach Reizneraa575662018-08-15 10:46:32 -0700769 if let Some(wayland_socket_path) = cfg.wayland_socket_path.as_ref() {
David Tolnay2b089fc2019-03-04 15:33:22 -0800770 devs.push(create_wayland_device(
771 cfg,
772 wayland_socket_path,
773 wayland_device_socket,
774 resource_bridge_wl_socket,
775 )?);
Zach Reizneraa575662018-08-15 10:46:32 -0700776 }
777
778 if let Some(cid) = cfg.cid {
David Tolnay2b089fc2019-03-04 15:33:22 -0800779 devs.push(create_vhost_vsock_device(cfg, cid, mem)?);
Zach Reizneraa575662018-08-15 10:46:32 -0700780 }
781
David Tolnayfd0971d2019-03-04 17:15:57 -0800782 let chronos = get_chronos_ids();
David Tolnay2b089fc2019-03-04 15:33:22 -0800783
784 for (src, tag) in &cfg.shared_dirs {
785 devs.push(create_9p_device(cfg, chronos, src, tag)?);
786 }
787
788 Ok(devs)
789}
790
791fn create_devices(
792 cfg: Config,
793 mem: &GuestMemory,
794 exit_evt: &EventFd,
Jakub Starond99cd0a2019-04-11 14:09:39 -0700795 wayland_device_socket: WlControlRequestSocket,
Jakub Staron1f828d72019-04-11 12:49:29 -0700796 balloon_device_socket: BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -0700797 disk_device_sockets: &mut Vec<DiskControlResponseSocket>,
Jingkui Wang100e6e42019-03-08 20:41:57 -0800798 usb_provider: HostBackendDeviceProvider,
David Tolnayfdac5ed2019-03-08 16:56:14 -0800799) -> DeviceResult<Vec<(Box<dyn PciDevice>, Option<Minijail>)>> {
David Tolnay2b089fc2019-03-04 15:33:22 -0800800 let stubs = create_virtio_devices(
801 &cfg,
802 mem,
803 exit_evt,
804 wayland_device_socket,
805 balloon_device_socket,
806 disk_device_sockets,
807 )?;
808
809 let mut pci_devices = Vec::new();
810
811 for stub in stubs {
812 let dev = VirtioPciDevice::new(mem.clone(), stub.dev).map_err(Error::VirtioPciDev)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800813 let dev = Box::new(dev) as Box<dyn PciDevice>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800814 pci_devices.push((dev, stub.jail));
815 }
816
817 if cfg.cras_audio {
David Tolnayfd0971d2019-03-04 17:15:57 -0800818 let server = Box::new(CrasClient::new().map_err(Error::CreateCrasClient)?);
David Tolnay2b089fc2019-03-04 15:33:22 -0800819 let cras_audio = devices::Ac97Dev::new(mem.clone(), server);
820
821 pci_devices.push((
822 Box::new(cras_audio),
823 simple_jail(&cfg, "cras_audio_device.policy")?,
824 ));
825 }
826
827 if cfg.null_audio {
828 let server = Box::new(DummyStreamSource::new());
829 let null_audio = devices::Ac97Dev::new(mem.clone(), server);
830
831 pci_devices.push((
832 Box::new(null_audio),
833 simple_jail(&cfg, "null_audio_device.policy")?,
834 ));
835 }
Jingkui Wang100e6e42019-03-08 20:41:57 -0800836 // Create xhci controller.
837 let usb_controller = Box::new(XhciController::new(mem.clone(), usb_provider));
838 pci_devices.push((usb_controller, simple_jail(&cfg, "xhci.policy")?));
David Tolnay2b089fc2019-03-04 15:33:22 -0800839
840 Ok(pci_devices)
841}
842
843#[derive(Copy, Clone)]
844struct Ids {
845 uid: uid_t,
846 gid: gid_t,
847}
848
David Tolnayfd0971d2019-03-04 17:15:57 -0800849fn get_chronos_ids() -> Ids {
Chirantan Ekboteebd56812018-04-16 19:32:04 -0700850 let chronos_user_group = CStr::from_bytes_with_nul(b"chronos\0").unwrap();
David Tolnay2b089fc2019-03-04 15:33:22 -0800851
Chirantan Ekboteebd56812018-04-16 19:32:04 -0700852 let chronos_uid = match get_user_id(&chronos_user_group) {
853 Ok(u) => u,
854 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -0800855 warn!("falling back to current user id for 9p: {}", e);
Chirantan Ekboteebd56812018-04-16 19:32:04 -0700856 geteuid()
857 }
858 };
David Tolnay2b089fc2019-03-04 15:33:22 -0800859
Chirantan Ekboteebd56812018-04-16 19:32:04 -0700860 let chronos_gid = match get_group_id(&chronos_user_group) {
861 Ok(u) => u,
862 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -0800863 warn!("falling back to current group id for 9p: {}", e);
Chirantan Ekboteebd56812018-04-16 19:32:04 -0700864 getegid()
865 }
866 };
867
David Tolnayfd0971d2019-03-04 17:15:57 -0800868 Ids {
David Tolnay2b089fc2019-03-04 15:33:22 -0800869 uid: chronos_uid,
870 gid: chronos_gid,
David Tolnayfd0971d2019-03-04 17:15:57 -0800871 }
David Tolnay41a6f842019-03-01 16:18:44 -0800872}
873
David Tolnay48c48292019-03-01 16:54:25 -0800874// Set the uid/gid for the jailed process and give a basic id map. This is
875// required for bind mounts to work.
David Tolnayfd0971d2019-03-04 17:15:57 -0800876fn add_crosvm_user_to_jail(jail: &mut Minijail, feature: &str) -> Result<Ids> {
David Tolnay48c48292019-03-01 16:54:25 -0800877 let crosvm_user_group = CStr::from_bytes_with_nul(b"crosvm\0").unwrap();
878
879 let crosvm_uid = match get_user_id(&crosvm_user_group) {
880 Ok(u) => u,
881 Err(e) => {
882 warn!("falling back to current user id for {}: {}", feature, e);
883 geteuid()
884 }
885 };
886
887 let crosvm_gid = match get_group_id(&crosvm_user_group) {
888 Ok(u) => u,
889 Err(e) => {
890 warn!("falling back to current group id for {}: {}", feature, e);
891 getegid()
892 }
893 };
894
895 jail.change_uid(crosvm_uid);
896 jail.change_gid(crosvm_gid);
897 jail.uidmap(&format!("{0} {0} 1", crosvm_uid))
898 .map_err(Error::SettingUidMap)?;
899 jail.gidmap(&format!("{0} {0} 1", crosvm_gid))
900 .map_err(Error::SettingGidMap)?;
901
David Tolnay41a6f842019-03-01 16:18:44 -0800902 Ok(Ids {
903 uid: crosvm_uid,
904 gid: crosvm_gid,
905 })
David Tolnay48c48292019-03-01 16:54:25 -0800906}
907
David Tolnayfd0971d2019-03-04 17:15:57 -0800908fn raw_fd_from_path(path: &Path) -> Result<RawFd> {
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800909 if !path.is_file() {
David Tolnayfd0971d2019-03-04 17:15:57 -0800910 return Err(Error::InvalidFdPath);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800911 }
912 let raw_fd = path
913 .file_name()
914 .and_then(|fd_osstr| fd_osstr.to_str())
915 .and_then(|fd_str| fd_str.parse::<c_int>().ok())
916 .ok_or(Error::InvalidFdPath)?;
David Tolnayfd0971d2019-03-04 17:15:57 -0800917 validate_raw_fd(raw_fd).map_err(Error::ValidateRawFd)
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800918}
919
David Tolnayfd0971d2019-03-04 17:15:57 -0800920fn create_input_socket(path: &Path) -> Result<UnixStream> {
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800921 if path.parent() == Some(Path::new("/proc/self/fd")) {
922 // Safe because we will validate |raw_fd|.
923 unsafe { Ok(UnixStream::from_raw_fd(raw_fd_from_path(path)?)) }
924 } else {
David Tolnayfd0971d2019-03-04 17:15:57 -0800925 UnixStream::connect(path).map_err(Error::InputEventsOpen)
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800926 }
927}
928
Mark Ryan6ed5aea2018-04-20 13:52:35 +0100929fn setup_vcpu_signal_handler() -> Result<()> {
930 unsafe {
931 extern "C" fn handle_signal() {}
932 // Our signal handler does nothing and is trivially async signal safe.
933 register_signal_handler(SIGRTMIN() + 0, handle_signal)
934 .map_err(Error::RegisterSignalHandler)?;
935 }
936 block_signal(SIGRTMIN() + 0).map_err(Error::BlockSignal)?;
937 Ok(())
938}
939
Zach Reizner6a8fdd92019-01-16 14:38:41 -0800940#[derive(Default)]
941struct VcpuRunMode {
942 mtx: Mutex<VmRunMode>,
943 cvar: Condvar,
944}
945
946impl VcpuRunMode {
947 fn set_and_notify(&self, new_mode: VmRunMode) {
948 *self.mtx.lock() = new_mode;
949 self.cvar.notify_all();
950 }
951}
952
Zach Reizner55a9e502018-10-03 10:22:32 -0700953fn run_vcpu(
954 vcpu: Vcpu,
955 cpu_id: u32,
Daniel Verkamp107edb32019-04-05 09:58:48 -0700956 vcpu_affinity: Vec<usize>,
Zach Reizner55a9e502018-10-03 10:22:32 -0700957 start_barrier: Arc<Barrier>,
958 io_bus: devices::Bus,
959 mmio_bus: devices::Bus,
960 exit_evt: EventFd,
Zach Reizner795355a2019-01-16 17:37:57 -0800961 requires_kvmclock_ctrl: bool,
Zach Reizner6a8fdd92019-01-16 14:38:41 -0800962 run_mode_arc: Arc<VcpuRunMode>,
Zach Reizner55a9e502018-10-03 10:22:32 -0700963) -> Result<JoinHandle<()>> {
Zach Reizner8fb52112017-12-13 16:04:39 -0800964 thread::Builder::new()
965 .name(format!("crosvm_vcpu{}", cpu_id))
966 .spawn(move || {
Daniel Verkamp107edb32019-04-05 09:58:48 -0700967 if vcpu_affinity.len() != 0 {
968 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
969 error!("Failed to set CPU affinity: {}", e);
970 }
971 }
972
Mark Ryan6ed5aea2018-04-20 13:52:35 +0100973 let mut sig_ok = true;
974 match get_blocked_signals() {
975 Ok(mut v) => {
976 v.retain(|&x| x != SIGRTMIN() + 0);
977 if let Err(e) = vcpu.set_signal_mask(&v) {
978 error!(
David Tolnayb4bd00f2019-02-12 17:51:26 -0800979 "Failed to set the KVM_SIGNAL_MASK for vcpu {} : {}",
Mark Ryan6ed5aea2018-04-20 13:52:35 +0100980 cpu_id, e
981 );
982 sig_ok = false;
983 }
984 }
985 Err(e) => {
986 error!(
David Tolnayb4bd00f2019-02-12 17:51:26 -0800987 "Failed to retrieve signal mask for vcpu {} : {}",
Mark Ryan6ed5aea2018-04-20 13:52:35 +0100988 cpu_id, e
989 );
990 sig_ok = false;
991 }
992 };
Zach Reizner39aa26b2017-12-12 18:03:23 -0800993
Zach Reizner8fb52112017-12-13 16:04:39 -0800994 start_barrier.wait();
Mark Ryan6ed5aea2018-04-20 13:52:35 +0100995
David Tolnay8f3a2322018-11-30 17:11:35 -0800996 if sig_ok {
Zach Reizner6a8fdd92019-01-16 14:38:41 -0800997 'vcpu_loop: loop {
998 let mut interrupted_by_signal = false;
David Tolnay8f3a2322018-11-30 17:11:35 -0800999 match vcpu.run() {
1000 Ok(VcpuExit::IoIn { port, mut size }) => {
1001 let mut data = [0; 8];
1002 if size > data.len() {
1003 error!("unsupported IoIn size of {} bytes", size);
1004 size = data.len();
Zach Reizner39aa26b2017-12-12 18:03:23 -08001005 }
David Tolnay8f3a2322018-11-30 17:11:35 -08001006 io_bus.read(port as u64, &mut data[..size]);
1007 if let Err(e) = vcpu.set_data(&data[..size]) {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001008 error!("failed to set return data for IoIn: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001009 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001010 }
David Tolnay8f3a2322018-11-30 17:11:35 -08001011 Ok(VcpuExit::IoOut {
1012 port,
1013 mut size,
1014 data,
1015 }) => {
1016 if size > data.len() {
1017 error!("unsupported IoOut size of {} bytes", size);
1018 size = data.len();
1019 }
1020 io_bus.write(port as u64, &data[..size]);
1021 }
1022 Ok(VcpuExit::MmioRead { address, size }) => {
1023 let mut data = [0; 8];
1024 mmio_bus.read(address, &mut data[..size]);
1025 // Setting data for mmio can not fail.
1026 let _ = vcpu.set_data(&data[..size]);
1027 }
1028 Ok(VcpuExit::MmioWrite {
1029 address,
1030 size,
1031 data,
1032 }) => {
1033 mmio_bus.write(address, &data[..size]);
1034 }
1035 Ok(VcpuExit::Hlt) => break,
1036 Ok(VcpuExit::Shutdown) => break,
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001037 Ok(VcpuExit::SystemEvent(_, _)) => break,
David Tolnay8f3a2322018-11-30 17:11:35 -08001038 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
1039 Err(e) => match e.errno() {
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001040 libc::EINTR => interrupted_by_signal = true,
1041 libc::EAGAIN => {}
David Tolnay8f3a2322018-11-30 17:11:35 -08001042 _ => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001043 error!("vcpu hit unknown error: {}", e);
David Tolnay8f3a2322018-11-30 17:11:35 -08001044 break;
1045 }
1046 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08001047 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001048
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001049 if interrupted_by_signal {
1050 // Try to clear the signal that we use to kick VCPU if it is pending before
1051 // attempting to handle pause requests.
1052 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001053 error!("failed to clear pending signal: {}", e);
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001054 break;
1055 }
1056 let mut run_mode_lock = run_mode_arc.mtx.lock();
1057 loop {
1058 match *run_mode_lock {
1059 VmRunMode::Running => break,
Zach Reizner795355a2019-01-16 17:37:57 -08001060 VmRunMode::Suspending => {
1061 // On KVM implementations that use a paravirtualized clock (e.g.
1062 // x86), a flag must be set to indicate to the guest kernel that
1063 // a VCPU was suspended. The guest kernel will use this flag to
1064 // prevent the soft lockup detection from triggering when this
1065 // VCPU resumes, which could happen days later in realtime.
1066 if requires_kvmclock_ctrl {
1067 if let Err(e) = vcpu.kvmclock_ctrl() {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001068 error!("failed to signal to kvm that vcpu {} is being suspended: {}", cpu_id, e);
Zach Reizner795355a2019-01-16 17:37:57 -08001069 }
1070 }
1071 }
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001072 VmRunMode::Exiting => break 'vcpu_loop,
1073 }
1074 // Give ownership of our exclusive lock to the condition variable that
1075 // will block. When the condition variable is notified, `wait` will
1076 // unblock and return a new exclusive lock.
1077 run_mode_lock = run_mode_arc.cvar.wait(run_mode_lock);
1078 }
1079 }
David Tolnay8f3a2322018-11-30 17:11:35 -08001080 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001081 }
Zach Reizner8fb52112017-12-13 16:04:39 -08001082 exit_evt
Zach Reizner39aa26b2017-12-12 18:03:23 -08001083 .write(1)
1084 .expect("failed to signal vcpu exit eventfd");
David Tolnay2bac1e72018-12-12 14:33:42 -08001085 })
1086 .map_err(Error::SpawnVcpu)
Zach Reizner39aa26b2017-12-12 18:03:23 -08001087}
1088
Sonny Raod5f66082019-04-24 12:24:38 -07001089// Reads the contents of a file and converts the space-separated fields into a Vec of u64s.
1090// Returns an error if any of the fields fail to parse.
1091fn file_fields_to_u64<P: AsRef<Path>>(path: P) -> io::Result<Vec<u64>> {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001092 let mut file = File::open(path)?;
1093
1094 let mut buf = [0u8; 32];
1095 let count = file.read(&mut buf)?;
1096
Zach Reizner55a9e502018-10-03 10:22:32 -07001097 let content =
1098 str::from_utf8(&buf[..count]).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1099 content
1100 .trim()
Sonny Raod5f66082019-04-24 12:24:38 -07001101 .split_whitespace()
1102 .map(|x| {
1103 x.parse::<u64>()
1104 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
1105 })
1106 .collect()
1107}
1108
1109// Reads the contents of a file and converts them into a u64, and if there
1110// are multiple fields it only returns the first one.
1111fn file_to_u64<P: AsRef<Path>>(path: P) -> io::Result<u64> {
1112 file_fields_to_u64(path)?
1113 .into_iter()
1114 .next()
1115 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "empty file"))
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001116}
1117
Dylan Reid059a1882018-07-23 17:58:09 -07001118pub fn run_config(cfg: Config) -> Result<()> {
Lepton Wu9105e9f2019-03-14 11:38:31 -07001119 if cfg.sandbox {
Dylan Reid059a1882018-07-23 17:58:09 -07001120 // Printing something to the syslog before entering minijail so that libc's syslogger has a
1121 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
1122 // access to those files will not be possible.
1123 info!("crosvm entering multiprocess mode");
1124 }
1125
Jingkui Wang100e6e42019-03-08 20:41:57 -08001126 let (usb_control_socket, usb_provider) =
David Tolnay5fb3f512019-04-12 19:22:33 -07001127 HostBackendDeviceProvider::new().map_err(Error::CreateUsbProvider)?;
Dylan Reid059a1882018-07-23 17:58:09 -07001128 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
1129 // before any jailed devices have been spawned, so that we can catch any of them that fail very
1130 // quickly.
1131 let sigchld_fd = SignalFd::new(libc::SIGCHLD).map_err(Error::CreateSignalFd)?;
1132
David Tolnay2b089fc2019-03-04 15:33:22 -08001133 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
1134 Some(File::open(initrd_path).map_err(|e| Error::OpenInitrd(initrd_path.clone(), e))?)
Daniel Verkampe403f5c2018-12-11 16:29:26 -08001135 } else {
1136 None
1137 };
1138
Dylan Reid059a1882018-07-23 17:58:09 -07001139 let components = VmComponents {
Jakub Staronf55f75d2019-04-26 11:22:51 -07001140 memory_size: (cfg.memory.unwrap_or(256) << 20) as u64,
Dylan Reid059a1882018-07-23 17:58:09 -07001141 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -07001142 vcpu_affinity: cfg.vcpu_affinity.clone(),
David Tolnay2b089fc2019-03-04 15:33:22 -08001143 kernel_image: File::open(&cfg.kernel_path)
Dylan Reid059a1882018-07-23 17:58:09 -07001144 .map_err(|e| Error::OpenKernel(cfg.kernel_path.clone(), e))?,
Tristan Muntsinger4133b012018-12-21 16:01:56 -08001145 android_fstab: cfg
1146 .android_fstab
1147 .as_ref()
David Tolnay2b089fc2019-03-04 15:33:22 -08001148 .map(|x| File::open(x).map_err(|e| Error::OpenAndroidFstab(x.to_path_buf(), e)))
Tristan Muntsinger4133b012018-12-21 16:01:56 -08001149 .map_or(Ok(None), |v| v.map(Some))?,
Daniel Verkampe403f5c2018-12-11 16:29:26 -08001150 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -07001151 extra_kernel_params: cfg.params.clone(),
1152 wayland_dmabuf: cfg.wayland_dmabuf,
Dylan Reid059a1882018-07-23 17:58:09 -07001153 };
1154
Zach Reiznera60744b2019-02-13 17:33:32 -08001155 let control_server_socket = match &cfg.socket_path {
1156 Some(path) => Some(UnlinkUnixSeqpacketListener(
1157 UnixSeqpacketListener::bind(path).map_err(Error::CreateSocket)?,
1158 )),
1159 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07001160 };
Zach Reiznera60744b2019-02-13 17:33:32 -08001161
1162 let mut control_sockets = Vec::new();
Zach Reizner55a9e502018-10-03 10:22:32 -07001163 let (wayland_host_socket, wayland_device_socket) =
Jakub Starond99cd0a2019-04-11 14:09:39 -07001164 msg_socket::pair::<WlDriverResponse, WlDriverRequest>().map_err(Error::CreateSocket)?;
1165 control_sockets.push(TaggedControlSocket::Wayland(wayland_host_socket));
Dylan Reid059a1882018-07-23 17:58:09 -07001166 // Balloon gets a special socket so balloon requests can be forwarded from the main process.
Zach Reizner55a9e502018-10-03 10:22:32 -07001167 let (balloon_host_socket, balloon_device_socket) =
Jakub Staron1f828d72019-04-11 12:49:29 -07001168 msg_socket::pair::<BalloonControlCommand, ()>().map_err(Error::CreateSocket)?;
Dylan Reid059a1882018-07-23 17:58:09 -07001169
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001170 // Create one control socket per disk.
1171 let mut disk_device_sockets = Vec::new();
1172 let mut disk_host_sockets = Vec::new();
1173 let disk_count = cfg.disks.len();
1174 for _ in 0..disk_count {
1175 let (disk_host_socket, disk_device_socket) =
Jakub Staronecf81e02019-04-11 11:43:39 -07001176 msg_socket::pair::<DiskControlCommand, DiskControlResult>()
1177 .map_err(Error::CreateSocket)?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001178 disk_host_sockets.push(disk_host_socket);
Jakub Starone7c59052019-04-09 12:31:14 -07001179 disk_device_sockets.push(disk_device_socket);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001180 }
1181
Lepton Wu20333e42019-03-14 10:48:03 -07001182 let sandbox = cfg.sandbox;
Miriam Zimmerman26ac9282019-01-29 21:21:48 -08001183 let linux = Arch::build_vm(components, cfg.split_irqchip, |m, e| {
Jianxun Zhang96f2d8e2019-02-20 13:50:42 -08001184 create_devices(
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001185 cfg,
1186 m,
1187 e,
1188 wayland_device_socket,
1189 balloon_device_socket,
1190 &mut disk_device_sockets,
Jingkui Wang100e6e42019-03-08 20:41:57 -08001191 usb_provider,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001192 )
David Tolnay2bac1e72018-12-12 14:33:42 -08001193 })
David Tolnaybe034262019-03-04 17:48:36 -08001194 .map_err(Error::BuildVm)?;
Lepton Wu60893882018-11-21 11:06:18 -08001195
1196 let _render_node_host = ();
1197 #[cfg(feature = "gpu-forward")]
1198 let (_render_node_host, linux) = {
1199 // Rebinds linux as mutable.
1200 let mut linux = linux;
1201
1202 // Reserve memory range for GPU buffer allocation in advance to bypass region count
1203 // limitation. We use mremap/MAP_FIXED later to make sure GPU buffers fall into this range.
1204 let gpu_mmap =
1205 MemoryMapping::new_protection(RENDER_NODE_HOST_SIZE as usize, Protection::none())
1206 .map_err(Error::ReserveGpuMemory)?;
1207
1208 // Put the non-accessible memory map into device memory so that no other devices use that
1209 // guest address space.
1210 let gpu_addr = linux
1211 .resources
Daniel Prilikd92f81a2019-03-26 14:28:19 -07001212 .device_allocator()
1213 .allocate(
1214 RENDER_NODE_HOST_SIZE,
1215 Alloc::GpuRenderNode,
1216 "gpu_render_node".to_string(),
1217 )
1218 .map_err(|_| Error::AllocateGpuDeviceAddress)?;
Lepton Wu60893882018-11-21 11:06:18 -08001219
1220 let host = RenderNodeHost::start(&gpu_mmap, gpu_addr, linux.vm.get_memory().clone());
1221
1222 // Makes the gpu memory accessible at allocated address.
1223 linux
1224 .vm
1225 .add_device_memory(
1226 GuestAddress(gpu_addr),
1227 gpu_mmap,
1228 /* read_only = */ false,
1229 /* log_dirty_pages = */ false,
1230 )
1231 .map_err(Error::AddGpuDeviceMemory)?;
1232 (host, linux)
1233 };
1234
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001235 run_control(
1236 linux,
Zach Reiznera60744b2019-02-13 17:33:32 -08001237 control_server_socket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001238 control_sockets,
1239 balloon_host_socket,
1240 &disk_host_sockets,
Jingkui Wang100e6e42019-03-08 20:41:57 -08001241 usb_control_socket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001242 sigchld_fd,
Lepton Wu60893882018-11-21 11:06:18 -08001243 _render_node_host,
Lepton Wu20333e42019-03-14 10:48:03 -07001244 sandbox,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001245 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07001246}
1247
Zach Reizner55a9e502018-10-03 10:22:32 -07001248fn run_control(
1249 mut linux: RunnableLinuxVm,
Zach Reiznera60744b2019-02-13 17:33:32 -08001250 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Jakub Starond99cd0a2019-04-11 14:09:39 -07001251 mut control_sockets: Vec<TaggedControlSocket>,
Jakub Staron1f828d72019-04-11 12:49:29 -07001252 balloon_host_socket: BalloonControlRequestSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07001253 disk_host_sockets: &[DiskControlRequestSocket],
Jingkui Wang100e6e42019-03-08 20:41:57 -08001254 usb_control_socket: UsbControlSocket,
Zach Reizner55a9e502018-10-03 10:22:32 -07001255 sigchld_fd: SignalFd,
Lepton Wu60893882018-11-21 11:06:18 -08001256 _render_node_host: RenderNodeHost,
Lepton Wu20333e42019-03-14 10:48:03 -07001257 sandbox: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001258) -> Result<()> {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001259 // Paths to get the currently available memory and the low memory threshold.
David Tolnay5bbbf612018-12-01 17:49:30 -08001260 const LOWMEM_MARGIN: &str = "/sys/kernel/mm/chromeos-low_mem/margin";
1261 const LOWMEM_AVAILABLE: &str = "/sys/kernel/mm/chromeos-low_mem/available";
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001262
1263 // The amount of additional memory to claim back from the VM whenever the system is
1264 // low on memory.
1265 const ONE_GB: u64 = (1 << 30);
1266
Dylan Reid0ed91ab2018-05-31 15:42:18 -07001267 let max_balloon_memory = match linux.vm.get_memory().memory_size() {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001268 // If the VM has at least 1.5 GB, the balloon driver can consume all but the last 1 GB.
1269 n if n >= (ONE_GB / 2) * 3 => n - ONE_GB,
1270 // Otherwise, if the VM has at least 500MB the balloon driver will consume at most
1271 // half of it.
1272 n if n >= (ONE_GB / 2) => n / 2,
1273 // Otherwise, the VM is too small for us to take memory away from it.
1274 _ => 0,
1275 };
1276 let mut current_balloon_memory: u64 = 0;
1277 let balloon_memory_increment: u64 = max_balloon_memory / 16;
1278
Zach Reizner5bed0d22018-03-28 02:31:11 -07001279 #[derive(PollToken)]
1280 enum Token {
1281 Exit,
1282 Stdin,
1283 ChildSignal,
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001284 CheckAvailableMemory,
1285 LowMemory,
1286 LowmemTimer,
Zach Reiznera60744b2019-02-13 17:33:32 -08001287 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07001288 VmControl { index: usize },
1289 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001290
1291 let stdin_handle = stdin();
1292 let stdin_lock = stdin_handle.lock();
1293 stdin_lock
1294 .set_raw_mode()
1295 .expect("failed to set terminal raw mode");
1296
Zach Reizner5bed0d22018-03-28 02:31:11 -07001297 let poll_ctx = PollContext::new().map_err(Error::CreatePollContext)?;
Zach Reizner55a9e502018-10-03 10:22:32 -07001298 poll_ctx
1299 .add(&linux.exit_evt, Token::Exit)
1300 .map_err(Error::PollContextAdd)?;
Zach Reizner5bed0d22018-03-28 02:31:11 -07001301 if let Err(e) = poll_ctx.add(&stdin_handle, Token::Stdin) {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001302 warn!("failed to add stdin to poll context: {}", e);
Zach Reizner5bed0d22018-03-28 02:31:11 -07001303 }
Zach Reizner55a9e502018-10-03 10:22:32 -07001304 poll_ctx
1305 .add(&sigchld_fd, Token::ChildSignal)
1306 .map_err(Error::PollContextAdd)?;
Zach Reiznera60744b2019-02-13 17:33:32 -08001307
1308 if let Some(socket_server) = &control_server_socket {
1309 poll_ctx
1310 .add(socket_server, Token::VmControlServer)
1311 .map_err(Error::PollContextAdd)?;
1312 }
Dylan Reid059a1882018-07-23 17:58:09 -07001313 for (index, socket) in control_sockets.iter().enumerate() {
Zach Reizner55a9e502018-10-03 10:22:32 -07001314 poll_ctx
1315 .add(socket.as_ref(), Token::VmControl { index })
1316 .map_err(Error::PollContextAdd)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001317 }
1318
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001319 // Watch for low memory notifications and take memory back from the VM.
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001320 let low_mem = File::open("/dev/chromeos-low-mem").ok();
David Tolnay64cd5ea2019-04-15 15:56:35 -07001321 if let Some(low_mem) = &low_mem {
Zach Reizner55a9e502018-10-03 10:22:32 -07001322 poll_ctx
1323 .add(low_mem, Token::LowMemory)
1324 .map_err(Error::PollContextAdd)?;
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001325 } else {
1326 warn!("Unable to open low mem indicator, maybe not a chrome os kernel");
1327 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001328
1329 // Used to rate limit balloon requests.
1330 let mut lowmem_timer = TimerFd::new().map_err(Error::CreateTimerFd)?;
Zach Reizner55a9e502018-10-03 10:22:32 -07001331 poll_ctx
1332 .add(&lowmem_timer, Token::LowmemTimer)
1333 .map_err(Error::PollContextAdd)?;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001334
1335 // Used to check whether it's ok to start giving memory back to the VM.
1336 let mut freemem_timer = TimerFd::new().map_err(Error::CreateTimerFd)?;
Zach Reizner55a9e502018-10-03 10:22:32 -07001337 poll_ctx
1338 .add(&freemem_timer, Token::CheckAvailableMemory)
1339 .map_err(Error::PollContextAdd)?;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001340
1341 // Used to add jitter to timer values so that we don't have a thundering herd problem when
1342 // multiple VMs are running.
Daniel Prilik22006042019-01-14 14:19:04 -08001343 let mut simple_rng = SimpleRng::new(
1344 SystemTime::now()
1345 .duration_since(UNIX_EPOCH)
1346 .expect("time went backwards")
1347 .subsec_nanos() as u64,
1348 );
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001349
Lepton Wu20333e42019-03-14 10:48:03 -07001350 if sandbox {
1351 // Before starting VCPUs, in case we started with some capabilities, drop them all.
1352 drop_capabilities().map_err(Error::DropCapabilities)?;
1353 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08001354
Daniel Verkamp37c4a782019-01-04 10:44:17 -08001355 let mut vcpu_handles = Vec::with_capacity(linux.vcpus.len());
1356 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpus.len() + 1));
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001357 let run_mode_arc = Arc::new(VcpuRunMode::default());
Dylan Reid059a1882018-07-23 17:58:09 -07001358 setup_vcpu_signal_handler()?;
1359 for (cpu_id, vcpu) in linux.vcpus.into_iter().enumerate() {
Zach Reizner55a9e502018-10-03 10:22:32 -07001360 let handle = run_vcpu(
1361 vcpu,
1362 cpu_id as u32,
Daniel Verkamp107edb32019-04-05 09:58:48 -07001363 linux.vcpu_affinity.clone(),
Zach Reizner55a9e502018-10-03 10:22:32 -07001364 vcpu_thread_barrier.clone(),
1365 linux.io_bus.clone(),
1366 linux.mmio_bus.clone(),
1367 linux.exit_evt.try_clone().map_err(Error::CloneEventFd)?,
Zach Reizner795355a2019-01-16 17:37:57 -08001368 linux.vm.check_extension(Cap::KvmclockCtrl),
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001369 run_mode_arc.clone(),
Zach Reizner55a9e502018-10-03 10:22:32 -07001370 )?;
Dylan Reid059a1882018-07-23 17:58:09 -07001371 vcpu_handles.push(handle);
1372 }
1373 vcpu_thread_barrier.wait();
1374
Zach Reizner39aa26b2017-12-12 18:03:23 -08001375 'poll: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07001376 let events = {
1377 match poll_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001378 Ok(v) => v,
1379 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001380 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001381 break;
1382 }
1383 }
1384 };
Zach Reiznera60744b2019-02-13 17:33:32 -08001385
1386 let mut vm_control_indices_to_remove = Vec::new();
Zach Reizner5bed0d22018-03-28 02:31:11 -07001387 for event in events.iter_readable() {
1388 match event.token() {
1389 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001390 info!("vcpu requested shutdown");
1391 break 'poll;
1392 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001393 Token::Stdin => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001394 let mut out = [0u8; 64];
1395 match stdin_lock.read_raw(&mut out[..]) {
1396 Ok(0) => {
1397 // Zero-length read indicates EOF. Remove from pollables.
Zach Reizner5bed0d22018-03-28 02:31:11 -07001398 let _ = poll_ctx.delete(&stdin_handle);
Zach Reizner55a9e502018-10-03 10:22:32 -07001399 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001400 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001401 warn!("error while reading stdin: {}", e);
Zach Reizner5bed0d22018-03-28 02:31:11 -07001402 let _ = poll_ctx.delete(&stdin_handle);
Zach Reizner55a9e502018-10-03 10:22:32 -07001403 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001404 Ok(count) => {
Zach Reizner55a9e502018-10-03 10:22:32 -07001405 linux
1406 .stdio_serial
Zach Reizner39aa26b2017-12-12 18:03:23 -08001407 .lock()
Zach Reizner39aa26b2017-12-12 18:03:23 -08001408 .queue_input_bytes(&out[..count])
1409 .expect("failed to queue bytes into serial port");
Zach Reizner55a9e502018-10-03 10:22:32 -07001410 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001411 }
1412 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001413 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001414 // Print all available siginfo structs, then exit the loop.
David Tolnayf5032762018-12-03 10:46:45 -08001415 while let Some(siginfo) = sigchld_fd.read().map_err(Error::SignalFd)? {
Zach Reizner3ba00982019-01-23 19:04:43 -08001416 let pid = siginfo.ssi_pid;
1417 let pid_label = match linux.pid_debug_label_map.get(&pid) {
1418 Some(label) => format!("{} (pid {})", label, pid),
1419 None => format!("pid {}", pid),
1420 };
David Tolnayf5032762018-12-03 10:46:45 -08001421 error!(
1422 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08001423 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08001424 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08001425 }
David Tolnayf5032762018-12-03 10:46:45 -08001426 break 'poll;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001427 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001428 Token::CheckAvailableMemory => {
1429 // Acknowledge the timer.
1430 freemem_timer.wait().map_err(Error::TimerFd)?;
1431 if current_balloon_memory == 0 {
1432 // Nothing to see here.
1433 if let Err(e) = freemem_timer.clear() {
1434 warn!("unable to clear available memory check timer: {}", e);
1435 }
1436 continue;
1437 }
1438
1439 // Otherwise see if we can free up some memory.
1440 let margin = file_to_u64(LOWMEM_MARGIN).map_err(Error::ReadLowmemMargin)?;
Zach Reizner55a9e502018-10-03 10:22:32 -07001441 let available =
1442 file_to_u64(LOWMEM_AVAILABLE).map_err(Error::ReadLowmemAvailable)?;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001443
1444 // `available` and `margin` are specified in MB while `balloon_memory_increment` is in
1445 // bytes. So to correctly compare them we need to turn the increment value into MB.
Zach Reizner55a9e502018-10-03 10:22:32 -07001446 if available >= margin + 2 * (balloon_memory_increment >> 20) {
1447 current_balloon_memory =
1448 if current_balloon_memory >= balloon_memory_increment {
1449 current_balloon_memory - balloon_memory_increment
1450 } else {
1451 0
1452 };
Jakub Staron1f828d72019-04-11 12:49:29 -07001453 let command = BalloonControlCommand::Adjust {
1454 num_bytes: current_balloon_memory,
1455 };
1456 if let Err(e) = balloon_host_socket.send(&command) {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001457 warn!("failed to send memory value to balloon device: {}", e);
1458 }
1459 }
1460 }
1461 Token::LowMemory => {
David Tolnay64cd5ea2019-04-15 15:56:35 -07001462 if let Some(low_mem) = &low_mem {
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001463 let old_balloon_memory = current_balloon_memory;
Zach Reizner55a9e502018-10-03 10:22:32 -07001464 current_balloon_memory = min(
1465 current_balloon_memory + balloon_memory_increment,
1466 max_balloon_memory,
1467 );
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001468 if current_balloon_memory != old_balloon_memory {
Jakub Staron1f828d72019-04-11 12:49:29 -07001469 let command = BalloonControlCommand::Adjust {
1470 num_bytes: current_balloon_memory,
1471 };
1472 if let Err(e) = balloon_host_socket.send(&command) {
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001473 warn!("failed to send memory value to balloon device: {}", e);
1474 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001475 }
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001476
1477 // Stop polling the lowmem device until the timer fires.
1478 poll_ctx.delete(low_mem).map_err(Error::PollContextDelete)?;
1479
1480 // Add some jitter to the timer so that if there are multiple VMs running
1481 // they don't all start ballooning at exactly the same time.
Daniel Prilik22006042019-01-14 14:19:04 -08001482 let lowmem_dur = Duration::from_millis(1000 + simple_rng.rng() % 200);
Zach Reizner55a9e502018-10-03 10:22:32 -07001483 lowmem_timer
1484 .reset(lowmem_dur, None)
1485 .map_err(Error::ResetTimerFd)?;
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001486
1487 // Also start a timer to check when we can start giving memory back. Do the
1488 // first check after a minute (with jitter) and subsequent checks after
1489 // every 30 seconds (with jitter).
Daniel Prilik22006042019-01-14 14:19:04 -08001490 let freemem_dur = Duration::from_secs(60 + simple_rng.rng() % 12);
1491 let freemem_int = Duration::from_secs(30 + simple_rng.rng() % 6);
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001492 freemem_timer
1493 .reset(freemem_dur, Some(freemem_int))
1494 .map_err(Error::ResetTimerFd)?;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001495 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001496 }
1497 Token::LowmemTimer => {
1498 // Acknowledge the timer.
1499 lowmem_timer.wait().map_err(Error::TimerFd)?;
1500
David Tolnay64cd5ea2019-04-15 15:56:35 -07001501 if let Some(low_mem) = &low_mem {
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001502 // Start polling the lowmem device again.
Zach Reizner55a9e502018-10-03 10:22:32 -07001503 poll_ctx
1504 .add(low_mem, Token::LowMemory)
1505 .map_err(Error::PollContextAdd)?;
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001506 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001507 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001508 Token::VmControlServer => {
1509 if let Some(socket_server) = &control_server_socket {
1510 match socket_server.accept() {
1511 Ok(socket) => {
1512 poll_ctx
1513 .add(
1514 &socket,
1515 Token::VmControl {
1516 index: control_sockets.len(),
1517 },
1518 )
1519 .map_err(Error::PollContextAdd)?;
Jakub Starond99cd0a2019-04-11 14:09:39 -07001520 control_sockets
1521 .push(TaggedControlSocket::Vm(MsgSocket::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08001522 }
1523 Err(e) => error!("failed to accept socket: {}", e),
1524 }
1525 }
1526 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001527 Token::VmControl { index } => {
Daniel Verkamp37c4a782019-01-04 10:44:17 -08001528 if let Some(socket) = control_sockets.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001529 match socket {
1530 TaggedControlSocket::Vm(socket) => match socket.recv() {
1531 Ok(request) => {
1532 let mut run_mode_opt = None;
1533 let response = request.execute(
1534 &mut run_mode_opt,
1535 &balloon_host_socket,
1536 disk_host_sockets,
1537 &usb_control_socket,
1538 );
1539 if let Err(e) = socket.send(&response) {
1540 error!("failed to send VmResponse: {}", e);
1541 }
1542 if let Some(run_mode) = run_mode_opt {
1543 info!("control socket changed run mode to {}", run_mode);
1544 match run_mode {
1545 VmRunMode::Exiting => {
1546 break 'poll;
1547 }
1548 other => {
1549 run_mode_arc.set_and_notify(other);
1550 for handle in &vcpu_handles {
1551 let _ = handle.kill(SIGRTMIN() + 0);
1552 }
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001553 }
1554 }
1555 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001556 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001557 Err(e) => {
1558 if let MsgError::BadRecvSize { actual: 0, .. } = e {
1559 vm_control_indices_to_remove.push(index);
1560 } else {
1561 error!("failed to recv VmRequest: {}", e);
1562 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001563 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001564 },
1565 TaggedControlSocket::Wayland(socket) => match socket.recv() {
1566 Ok(request) => {
1567 let response =
1568 request.execute(&mut linux.vm, &mut linux.resources);
1569 if let Err(e) = socket.send(&response) {
1570 error!("failed to send WlControlResponse: {}", e);
1571 }
1572 }
1573 Err(e) => {
1574 if let MsgError::BadRecvSize { actual: 0, .. } = e {
1575 vm_control_indices_to_remove.push(index);
1576 } else {
1577 error!("failed to recv WlControlRequest: {}", e);
1578 }
1579 }
1580 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08001581 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001582 }
1583 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001584 }
1585 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001586
Zach Reizner5bed0d22018-03-28 02:31:11 -07001587 for event in events.iter_hungup() {
Zach Reiznera60744b2019-02-13 17:33:32 -08001588 match event.token() {
1589 Token::Exit => {}
1590 Token::Stdin => {
1591 let _ = poll_ctx.delete(&stdin_handle);
1592 }
1593 Token::ChildSignal => {}
1594 Token::CheckAvailableMemory => {}
1595 Token::LowMemory => {}
1596 Token::LowmemTimer => {}
1597 Token::VmControlServer => {}
1598 Token::VmControl { index } => {
1599 // It's possible more data is readable and buffered while the socket is hungup,
1600 // so don't delete the socket from the poll context until we're sure all the
1601 // data is read.
Jakub Starond99cd0a2019-04-11 14:09:39 -07001602 match control_sockets
1603 .get(index)
1604 .map(|s| s.as_ref().get_readable_bytes())
1605 {
Zach Reiznera60744b2019-02-13 17:33:32 -08001606 Some(Ok(0)) | Some(Err(_)) => vm_control_indices_to_remove.push(index),
1607 Some(Ok(x)) => info!("control index {} has {} bytes readable", index, x),
1608 _ => {}
Zach Reizner55a9e502018-10-03 10:22:32 -07001609 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001610 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001611 }
1612 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001613
1614 // Sort in reverse so the highest indexes are removed first. This removal algorithm
1615 // preserved correct indexes as each element is removed.
1616 vm_control_indices_to_remove.sort_unstable_by(|a, b| b.cmp(a));
1617 vm_control_indices_to_remove.dedup();
1618 for index in vm_control_indices_to_remove {
1619 control_sockets.swap_remove(index);
1620 if let Some(socket) = control_sockets.get(index) {
1621 poll_ctx
1622 .add(socket, Token::VmControl { index })
1623 .map_err(Error::PollContextAdd)?;
1624 }
1625 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001626 }
1627
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001628 // VCPU threads MUST see the VmRunMode flag, otherwise they may re-enter the VM.
1629 run_mode_arc.set_and_notify(VmRunMode::Exiting);
Dylan Reid059a1882018-07-23 17:58:09 -07001630 for handle in vcpu_handles {
Dmitry Torokhovcd405332018-02-16 16:25:54 -08001631 match handle.kill(SIGRTMIN() + 0) {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001632 Ok(_) => {
1633 if let Err(e) = handle.join() {
1634 error!("failed to join vcpu thread: {:?}", e);
1635 }
1636 }
David Tolnayb4bd00f2019-02-12 17:51:26 -08001637 Err(e) => error!("failed to kill vcpu thread: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -08001638 }
1639 }
1640
1641 stdin_lock
1642 .set_canon_mode()
1643 .expect("failed to restore canonical mode for terminal");
1644
1645 Ok(())
1646}