blob: e7583a43937a0bc850b3028fe8f33a96db16f3d5 [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use std;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07006use std::cmp::min;
Zach Reizner55a9e502018-10-03 10:22:32 -07007use std::error;
Dylan Reid059a1882018-07-23 17:58:09 -07008use std::ffi::CStr;
David Tolnayc69f9752019-03-01 18:07:56 -08009use std::fmt::{self, Display};
Dylan Reid059a1882018-07-23 17:58:09 -070010use std::fs::{File, OpenOptions};
Zach Reizner55a9e502018-10-03 10:22:32 -070011use std::io::{self, stdin, Read};
Chirantan Ekbote448516e2018-07-24 16:07:42 -070012use std::mem;
David Tolnay2b089fc2019-03-04 15:33:22 -080013use std::net::Ipv4Addr;
Jorge E. Moreiradffec502019-01-14 18:44:49 -080014use std::os::unix::io::{FromRawFd, RawFd};
Zach Reiznera60744b2019-02-13 17:33:32 -080015use std::os::unix::net::UnixStream;
Zach Reizner39aa26b2017-12-12 18:03:23 -080016use std::path::{Path, PathBuf};
Chirantan Ekbote448516e2018-07-24 16:07:42 -070017use std::str;
Dylan Reid059a1882018-07-23 17:58:09 -070018use std::sync::{Arc, Barrier};
Zach Reizner39aa26b2017-12-12 18:03:23 -080019use std::thread;
20use std::thread::JoinHandle;
Daniel Prilik22006042019-01-14 14:19:04 -080021use std::time::{Duration, SystemTime, UNIX_EPOCH};
Zach Reizner39aa26b2017-12-12 18:03:23 -080022
David Tolnay41a6f842019-03-01 16:18:44 -080023use libc::{self, c_int, gid_t, uid_t};
Zach Reizner39aa26b2017-12-12 18:03:23 -080024
Dylan Reid3082e8e2019-01-07 10:33:48 -080025use audio_streams::DummyStreamSource;
Chirantan Ekbote448516e2018-07-24 16:07:42 -070026use byteorder::{ByteOrder, LittleEndian};
David Tolnay2b089fc2019-03-04 15:33:22 -080027use devices::virtio::{self, VirtioDevice};
Daniel Verkamp56f283b2018-10-05 11:40:59 -070028use devices::{self, PciDevice, VirtioPciDevice};
Zach Reizner39aa26b2017-12-12 18:03:23 -080029use io_jail::{self, Minijail};
Zach Reizner39aa26b2017-12-12 18:03:23 -080030use kvm::*;
paulhsiaf052cfe2019-01-22 15:22:25 +080031use libcras::CrasClient;
Zach Reiznera60744b2019-02-13 17:33:32 -080032use msg_socket::{MsgError, MsgReceiver, MsgSender, MsgSocket};
David Tolnay2b089fc2019-03-04 15:33:22 -080033use net_util::{Error as NetError, MacAddress, Tap};
Daniel Verkampf02fdd12018-10-10 17:25:14 -070034use qcow::{self, ImageType, QcowFile};
Daniel Prilik22006042019-01-14 14:19:04 -080035use rand_ish::SimpleRng;
Zach Reizner6a8fdd92019-01-16 14:38:41 -080036use sync::{Condvar, Mutex};
Zach Reiznera60744b2019-02-13 17:33:32 -080037use sys_util::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
38use sys_util::{
Dmitry Torokhov71006072019-03-06 10:56:51 -080039 self, block_signal, clear_signal, drop_capabilities, flock, get_blocked_signals, get_group_id,
40 get_user_id, getegid, geteuid, register_signal_handler, validate_raw_fd, EventFd,
41 FlockOperation, GuestMemory, Killable, PollContext, PollToken, SignalFd, Terminal, TimerFd,
42 SIGRTMIN,
Zach Reiznera60744b2019-02-13 17:33:32 -080043};
Lepton Wu60893882018-11-21 11:06:18 -080044#[cfg(feature = "gpu-forward")]
45use sys_util::{GuestAddress, MemoryMapping, Protection};
Jason D. Clinton865323d2017-09-27 22:04:03 -060046use vhost;
Zach Reizner6a8fdd92019-01-16 14:38:41 -080047use vm_control::{VmRequest, VmResponse, VmRunMode};
Zach Reizner39aa26b2017-12-12 18:03:23 -080048
Jorge E. Moreira99d3f082019-03-07 10:59:54 -080049use crate::{Config, DiskOption, TouchDeviceOption};
Zach Reizner39aa26b2017-12-12 18:03:23 -080050
Dylan Reid059a1882018-07-23 17:58:09 -070051use arch::{self, LinuxArch, RunnableLinuxVm, VirtioDeviceStub, VmComponents};
Sonny Raoed517d12018-02-13 22:09:43 -080052
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080053#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
54use aarch64::AArch64 as Arch;
Zach Reizner55a9e502018-10-03 10:22:32 -070055#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
56use x86_64::X8664arch as Arch;
Zach Reizner39aa26b2017-12-12 18:03:23 -080057
Lepton Wu60893882018-11-21 11:06:18 -080058#[cfg(feature = "gpu-forward")]
59extern crate render_node_forward;
60#[cfg(feature = "gpu-forward")]
61use self::render_node_forward::*;
62#[cfg(not(feature = "gpu-forward"))]
63type RenderNodeHost = ();
64
Dylan Reid059a1882018-07-23 17:58:09 -070065#[derive(Debug)]
Zach Reizner39aa26b2017-12-12 18:03:23 -080066pub enum Error {
Lepton Wu60893882018-11-21 11:06:18 -080067 AddGpuDeviceMemory(sys_util::Error),
68 AllocateGpuDeviceAddress,
David Tolnay2b089fc2019-03-04 15:33:22 -080069 BalloonDeviceNew(virtio::BalloonError),
Zach Reizner39aa26b2017-12-12 18:03:23 -080070 BlockDeviceNew(sys_util::Error),
Mark Ryan6ed5aea2018-04-20 13:52:35 +010071 BlockSignal(sys_util::signal::Error),
David Tolnaybe034262019-03-04 17:48:36 -080072 BuildVm(<Arch as LinuxArch>::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -080073 ChownTpmStorage(sys_util::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080074 CloneEventFd(sys_util::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -080075 CreateCrasClient(libcras::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080076 CreateEventFd(sys_util::Error),
Zach Reizner5bed0d22018-03-28 02:31:11 -070077 CreatePollContext(sys_util::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080078 CreateSignalFd(sys_util::SignalFdError),
79 CreateSocket(io::Error),
Chirantan Ekbote49fa08f2018-11-16 13:26:53 -080080 CreateTapDevice(NetError),
Chirantan Ekbote448516e2018-07-24 16:07:42 -070081 CreateTimerFd(sys_util::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -080082 CreateTpmStorage(PathBuf, io::Error),
Daniel Verkampf02fdd12018-10-10 17:25:14 -070083 DetectImageType(qcow::Error),
Zach Reizner39aa26b2017-12-12 18:03:23 -080084 DeviceJail(io_jail::Error),
85 DevicePivotRoot(io_jail::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080086 Disk(io::Error),
Stephen Barberc79de2d2018-02-21 14:17:27 -080087 DiskImageLock(sys_util::Error),
Dmitry Torokhov71006072019-03-06 10:56:51 -080088 DropCapabilities(sys_util::Error),
Lepton Wu39133a02019-02-27 12:42:29 -080089 InputDeviceNew(virtio::InputError),
90 InputEventsOpen(std::io::Error),
Dylan Reid20566442018-04-02 15:06:15 -070091 InvalidFdPath,
Zach Reizner579bd2c2018-09-14 15:43:33 -070092 InvalidWaylandPath,
David Tolnayfd0971d2019-03-04 17:15:57 -080093 IoJail(io_jail::Error),
Lepton Wu39133a02019-02-27 12:42:29 -080094 LoadKernel(Box<error::Error>),
David Tolnay2b089fc2019-03-04 15:33:22 -080095 NetDeviceNew(virtio::NetError),
Tristan Muntsinger4133b012018-12-21 16:01:56 -080096 OpenAndroidFstab(PathBuf, io::Error),
Daniel Verkampe403f5c2018-12-11 16:29:26 -080097 OpenInitrd(PathBuf, io::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080098 OpenKernel(PathBuf, io::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -080099 OpenVinput(PathBuf, io::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800100 P9DeviceNew(virtio::P9Error),
Lepton Wu39133a02019-02-27 12:42:29 -0800101 PivotRootDoesntExist(&'static str),
Zach Reizner5bed0d22018-03-28 02:31:11 -0700102 PollContextAdd(sys_util::Error),
Chirantan Ekbote448516e2018-07-24 16:07:42 -0700103 PollContextDelete(sys_util::Error),
Dylan Reid88624f82018-01-11 09:20:16 -0800104 QcowDeviceCreate(qcow::Error),
Chirantan Ekbote448516e2018-07-24 16:07:42 -0700105 ReadLowmemAvailable(io::Error),
106 ReadLowmemMargin(io::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700107 RegisterBalloon(arch::DeviceRegistrationError),
108 RegisterBlock(arch::DeviceRegistrationError),
109 RegisterGpu(arch::DeviceRegistrationError),
110 RegisterNet(arch::DeviceRegistrationError),
111 RegisterP9(arch::DeviceRegistrationError),
112 RegisterRng(arch::DeviceRegistrationError),
Mark Ryan6ed5aea2018-04-20 13:52:35 +0100113 RegisterSignalHandler(sys_util::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700114 RegisterWayland(arch::DeviceRegistrationError),
Lepton Wu60893882018-11-21 11:06:18 -0800115 ReserveGpuMemory(sys_util::MmapError),
116 ReserveMemory(sys_util::Error),
Chirantan Ekbote448516e2018-07-24 16:07:42 -0700117 ResetTimerFd(sys_util::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800118 RngDeviceNew(virtio::RngError),
Zach Reizner8fb52112017-12-13 16:04:39 -0800119 SettingGidMap(io_jail::Error),
120 SettingUidMap(io_jail::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800121 SignalFd(sys_util::SignalFdError),
122 SpawnVcpu(io::Error),
Chirantan Ekbote448516e2018-07-24 16:07:42 -0700123 TimerFd(sys_util::Error),
Chirantan Ekbote2d292332018-11-16 11:35:24 -0800124 ValidateRawFd(sys_util::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800125 VhostNetDeviceNew(virtio::vhost::Error),
126 VhostVsockDeviceNew(virtio::vhost::Error),
Daniel Verkamp56f283b2018-10-05 11:40:59 -0700127 VirtioPciDev(sys_util::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800128 WaylandDeviceNew(sys_util::Error),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800129}
130
David Tolnayc69f9752019-03-01 18:07:56 -0800131impl Display for Error {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800132 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
David Tolnayc69f9752019-03-01 18:07:56 -0800133 use self::Error::*;
134
Zach Reizner39aa26b2017-12-12 18:03:23 -0800135 match self {
Lepton Wu60893882018-11-21 11:06:18 -0800136 AddGpuDeviceMemory(e) => write!(f, "failed to add gpu device memory: {}", e),
137 AllocateGpuDeviceAddress => write!(f, "failed to allocate gpu device guest address"),
David Tolnayc69f9752019-03-01 18:07:56 -0800138 BalloonDeviceNew(e) => write!(f, "failed to create balloon: {}", e),
139 BlockDeviceNew(e) => write!(f, "failed to create block device: {}", e),
140 BlockSignal(e) => write!(f, "failed to block signal: {}", e),
David Tolnaybe034262019-03-04 17:48:36 -0800141 BuildVm(e) => write!(f, "The architecture failed to build the vm: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800142 ChownTpmStorage(e) => write!(f, "failed to chown tpm storage: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800143 CloneEventFd(e) => write!(f, "failed to clone eventfd: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800144 CreateCrasClient(e) => write!(f, "failed to create cras client: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800145 CreateEventFd(e) => write!(f, "failed to create eventfd: {}", e),
146 CreatePollContext(e) => write!(f, "failed to create poll context: {}", e),
147 CreateSignalFd(e) => write!(f, "failed to create signalfd: {}", e),
148 CreateSocket(e) => write!(f, "failed to create socket: {}", e),
149 CreateTapDevice(e) => write!(f, "failed to create tap device: {}", e),
150 CreateTimerFd(e) => write!(f, "failed to create timerfd: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800151 CreateTpmStorage(p, e) => {
152 write!(f, "failed to create tpm storage dir {}: {}", p.display(), e)
153 }
David Tolnayc69f9752019-03-01 18:07:56 -0800154 DetectImageType(e) => write!(f, "failed to detect disk image type: {}", e),
155 DeviceJail(e) => write!(f, "failed to jail device: {}", e),
156 DevicePivotRoot(e) => write!(f, "failed to pivot root device: {}", e),
157 Disk(e) => write!(f, "failed to load disk image: {}", e),
158 DiskImageLock(e) => write!(f, "failed to lock disk image: {}", e),
Dmitry Torokhov71006072019-03-06 10:56:51 -0800159 DropCapabilities(e) => write!(f, "failed to drop process capabilities: {}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800160 InputDeviceNew(ref e) => write!(f, "failed to set up input device: {}", e),
161 InputEventsOpen(ref e) => write!(f, "failed to open event device: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800162 InvalidFdPath => write!(f, "failed parsing a /proc/self/fd/*"),
163 InvalidWaylandPath => write!(f, "wayland socket path has no parent or file name"),
David Tolnayfd0971d2019-03-04 17:15:57 -0800164 IoJail(e) => write!(f, "{}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800165 LoadKernel(e) => write!(f, "failed to load kernel: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800166 NetDeviceNew(e) => write!(f, "failed to set up virtio networking: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800167 OpenInitrd(p, e) => write!(f, "failed to open initrd {}: {}", p.display(), e),
168 OpenKernel(p, e) => write!(f, "failed to open kernel image {}: {}", p.display(), e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800169 OpenAndroidFstab(p, e) => write!(
David Tolnayb4bd00f2019-02-12 17:51:26 -0800170 f,
171 "failed to open android fstab file {}: {}",
172 p.display(),
173 e
174 ),
David Tolnayfd0971d2019-03-04 17:15:57 -0800175 OpenVinput(p, e) => write!(f, "failed to open vinput device {}: {}", p.display(), e),
David Tolnayc69f9752019-03-01 18:07:56 -0800176 P9DeviceNew(e) => write!(f, "failed to create 9p device: {}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800177 PivotRootDoesntExist(p) => write!(f, "{} doesn't exist, can't jail devices.", p),
David Tolnayc69f9752019-03-01 18:07:56 -0800178 PollContextAdd(e) => write!(f, "failed to add fd to poll context: {}", e),
179 PollContextDelete(e) => write!(f, "failed to remove fd from poll context: {}", e),
180 QcowDeviceCreate(e) => write!(f, "failed to read qcow formatted file {}", e),
181 ReadLowmemAvailable(e) => write!(
Zach Reizner55a9e502018-10-03 10:22:32 -0700182 f,
183 "failed to read /sys/kernel/mm/chromeos-low_mem/available: {}",
184 e
185 ),
David Tolnayc69f9752019-03-01 18:07:56 -0800186 ReadLowmemMargin(e) => write!(
Zach Reizner55a9e502018-10-03 10:22:32 -0700187 f,
188 "failed to read /sys/kernel/mm/chromeos-low_mem/margin: {}",
189 e
190 ),
David Tolnayc69f9752019-03-01 18:07:56 -0800191 RegisterBalloon(e) => write!(f, "error registering balloon device: {}", e),
192 RegisterBlock(e) => write!(f, "error registering block device: {}", e),
193 RegisterGpu(e) => write!(f, "error registering gpu device: {}", e),
194 RegisterNet(e) => write!(f, "error registering net device: {}", e),
195 RegisterP9(e) => write!(f, "error registering 9p device: {}", e),
196 RegisterRng(e) => write!(f, "error registering rng device: {}", e),
197 RegisterSignalHandler(e) => write!(f, "error registering signal handler: {}", e),
198 RegisterWayland(e) => write!(f, "error registering wayland device: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800199 ReserveGpuMemory(e) => write!(f, "failed to reserve gpu memory: {}", e),
200 ReserveMemory(e) => write!(f, "failed to reserve memory: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800201 ResetTimerFd(e) => write!(f, "failed to reset timerfd: {}", e),
202 RngDeviceNew(e) => write!(f, "failed to set up rng: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800203 SettingGidMap(e) => write!(f, "error setting GID map: {}", e),
204 SettingUidMap(e) => write!(f, "error setting UID map: {}", e),
205 SignalFd(e) => write!(f, "failed to read signal fd: {}", e),
206 SpawnVcpu(e) => write!(f, "failed to spawn VCPU thread: {}", e),
207 TimerFd(e) => write!(f, "failed to read timer fd: {}", e),
208 ValidateRawFd(e) => write!(f, "failed to validate raw fd: {}", e),
209 VhostNetDeviceNew(e) => write!(f, "failed to set up vhost networking: {}", e),
210 VhostVsockDeviceNew(e) => write!(f, "failed to set up virtual socket device: {}", e),
211 VirtioPciDev(e) => write!(f, "failed to create virtio pci dev: {}", e),
212 WaylandDeviceNew(e) => write!(f, "failed to create wayland device: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800213 }
214 }
215}
216
David Tolnayfd0971d2019-03-04 17:15:57 -0800217impl From<io_jail::Error> for Error {
218 fn from(err: io_jail::Error) -> Self {
219 Error::IoJail(err)
220 }
221}
222
David Tolnayc69f9752019-03-01 18:07:56 -0800223impl std::error::Error for Error {}
Dylan Reid059a1882018-07-23 17:58:09 -0700224
Zach Reizner39aa26b2017-12-12 18:03:23 -0800225type Result<T> = std::result::Result<T, Error>;
226
Zach Reizner39aa26b2017-12-12 18:03:23 -0800227fn create_base_minijail(root: &Path, seccomp_policy: &Path) -> Result<Minijail> {
228 // All child jails run in a new user namespace without any users mapped,
229 // they run as nobody unless otherwise configured.
David Tolnay5bbbf612018-12-01 17:49:30 -0800230 let mut j = Minijail::new().map_err(Error::DeviceJail)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -0800231 j.namespace_pids();
232 j.namespace_user();
233 j.namespace_user_disable_setgroups();
234 // Don't need any capabilities.
235 j.use_caps(0);
236 // Create a new mount namespace with an empty root FS.
237 j.namespace_vfs();
David Tolnay5bbbf612018-12-01 17:49:30 -0800238 j.enter_pivot_root(root).map_err(Error::DevicePivotRoot)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -0800239 // Run in an empty network namespace.
240 j.namespace_net();
241 // Apply the block device seccomp policy.
242 j.no_new_privs();
Stephen Barber3b1d8a52018-01-06 17:34:51 -0800243 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP, which will correctly kill
244 // the entire device process if a worker thread commits a seccomp violation.
245 j.set_seccomp_filter_tsync();
Zach Reizner043ddc52018-04-03 20:47:21 -0700246 #[cfg(debug_assertions)]
247 j.log_seccomp_filter_failures();
Zach Reizner39aa26b2017-12-12 18:03:23 -0800248 j.parse_seccomp_filters(seccomp_policy)
David Tolnay5bbbf612018-12-01 17:49:30 -0800249 .map_err(Error::DeviceJail)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -0800250 j.use_seccomp_filter();
251 // Don't do init setup.
252 j.run_as_init();
253 Ok(j)
254}
255
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800256fn simple_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
Lepton Wu9105e9f2019-03-14 11:38:31 -0700257 if cfg.sandbox {
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800258 let pivot_root: &str = option_env!("DEFAULT_PIVOT_ROOT").unwrap_or("/var/empty");
259 // A directory for a jailed device's pivot root.
260 let root_path = Path::new(pivot_root);
261 if !root_path.exists() {
262 return Err(Error::PivotRootDoesntExist(pivot_root));
263 }
264 let policy_path: PathBuf = cfg.seccomp_policy_dir.join(policy);
265 Ok(Some(create_base_minijail(root_path, &policy_path)?))
266 } else {
267 Ok(None)
268 }
269}
270
David Tolnayfd0971d2019-03-04 17:15:57 -0800271type DeviceResult<T = VirtioDeviceStub> = std::result::Result<T, Error>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800272
273fn create_block_device(
274 cfg: &Config,
275 disk: &DiskOption,
276 disk_device_socket: UnixSeqpacket,
277) -> DeviceResult {
278 // Special case '/proc/self/fd/*' paths. The FD is already open, just use it.
279 let raw_image: File = if disk.path.parent() == Some(Path::new("/proc/self/fd")) {
280 // Safe because we will validate |raw_fd|.
281 unsafe { File::from_raw_fd(raw_fd_from_path(&disk.path)?) }
282 } else {
283 OpenOptions::new()
284 .read(true)
285 .write(!disk.read_only)
286 .open(&disk.path)
287 .map_err(Error::Disk)?
288 };
289 // Lock the disk image to prevent other crosvm instances from using it.
290 let lock_op = if disk.read_only {
291 FlockOperation::LockShared
292 } else {
293 FlockOperation::LockExclusive
294 };
295 flock(&raw_image, lock_op, true).map_err(Error::DiskImageLock)?;
296
297 let image_type = qcow::detect_image_type(&raw_image).map_err(Error::DetectImageType)?;
298 let dev = match image_type {
299 ImageType::Raw => {
300 // Access as a raw block device.
301 let dev = virtio::Block::new(raw_image, disk.read_only, Some(disk_device_socket))
302 .map_err(Error::BlockDeviceNew)?;
303 Box::new(dev) as Box<VirtioDevice>
304 }
305 ImageType::Qcow2 => {
306 // Valid qcow header present
307 let qcow_image = QcowFile::from(raw_image).map_err(Error::QcowDeviceCreate)?;
308 let dev = virtio::Block::new(qcow_image, disk.read_only, Some(disk_device_socket))
309 .map_err(Error::BlockDeviceNew)?;
310 Box::new(dev) as Box<VirtioDevice>
311 }
312 };
313
314 Ok(VirtioDeviceStub {
315 dev,
316 jail: simple_jail(&cfg, "block_device.policy")?,
317 })
318}
319
320fn create_rng_device(cfg: &Config) -> DeviceResult {
321 let dev = virtio::Rng::new().map_err(Error::RngDeviceNew)?;
322
323 Ok(VirtioDeviceStub {
324 dev: Box::new(dev),
325 jail: simple_jail(&cfg, "rng_device.policy")?,
326 })
327}
328
329#[cfg(feature = "tpm")]
330fn create_tpm_device(cfg: &Config) -> DeviceResult {
331 use std::ffi::CString;
332 use std::fs;
333 use std::process;
334 use sys_util::chown;
335
336 let tpm_storage: PathBuf;
337 let mut tpm_jail = simple_jail(&cfg, "tpm_device.policy")?;
338
339 match &mut tpm_jail {
340 Some(jail) => {
341 // Create a tmpfs in the device's root directory for tpm
342 // simulator storage. The size is 20*1024, or 20 KB.
343 jail.mount_with_data(
344 Path::new("none"),
345 Path::new("/"),
346 "tmpfs",
347 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
348 "size=20480",
349 )?;
350
351 let crosvm_ids = add_crosvm_user_to_jail(jail, "tpm")?;
352
353 let pid = process::id();
354 let tpm_pid_dir = format!("/run/vm/tpm.{}", pid);
355 tpm_storage = Path::new(&tpm_pid_dir).to_owned();
David Tolnayfd0971d2019-03-04 17:15:57 -0800356 fs::create_dir_all(&tpm_storage)
357 .map_err(|e| Error::CreateTpmStorage(tpm_storage.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800358 let tpm_pid_dir_c = CString::new(tpm_pid_dir).expect("no nul bytes");
David Tolnayfd0971d2019-03-04 17:15:57 -0800359 chown(&tpm_pid_dir_c, crosvm_ids.uid, crosvm_ids.gid)
360 .map_err(Error::ChownTpmStorage)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800361
362 jail.mount_bind(&tpm_storage, &tpm_storage, true)?;
363 }
364 None => {
365 // Path used inside cros_sdk which does not have /run/vm.
366 tpm_storage = Path::new("/tmp/tpm-simulator").to_owned();
367 }
368 }
369
370 let dev = virtio::Tpm::new(tpm_storage);
371
372 Ok(VirtioDeviceStub {
373 dev: Box::new(dev),
374 jail: tpm_jail,
375 })
376}
377
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800378fn create_single_touch_device(cfg: &Config, single_touch_spec: &TouchDeviceOption) -> DeviceResult {
379 let socket = create_input_socket(&single_touch_spec.path).map_err(|e| {
380 error!("failed configuring virtio single touch: {:?}", e);
381 e
382 })?;
383
384 let dev = virtio::new_single_touch(socket, single_touch_spec.width, single_touch_spec.height)
385 .map_err(Error::InputDeviceNew)?;
386 Ok(VirtioDeviceStub {
387 dev: Box::new(dev),
388 jail: simple_jail(&cfg, "input_device.policy")?,
389 })
390}
391
392fn create_trackpad_device(cfg: &Config, trackpad_spec: &TouchDeviceOption) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800393 let socket = create_input_socket(&trackpad_spec.path).map_err(|e| {
394 error!("failed configuring virtio trackpad: {}", e);
395 e
396 })?;
397
398 let dev = virtio::new_trackpad(socket, trackpad_spec.width, trackpad_spec.height)
399 .map_err(Error::InputDeviceNew)?;
400
401 Ok(VirtioDeviceStub {
402 dev: Box::new(dev),
403 jail: simple_jail(&cfg, "input_device.policy")?,
404 })
405}
406
407fn create_mouse_device(cfg: &Config, mouse_socket: &Path) -> DeviceResult {
408 let socket = create_input_socket(&mouse_socket).map_err(|e| {
409 error!("failed configuring virtio mouse: {}", e);
410 e
411 })?;
412
413 let dev = virtio::new_mouse(socket).map_err(Error::InputDeviceNew)?;
414
415 Ok(VirtioDeviceStub {
416 dev: Box::new(dev),
417 jail: simple_jail(&cfg, "input_device.policy")?,
418 })
419}
420
421fn create_keyboard_device(cfg: &Config, keyboard_socket: &Path) -> DeviceResult {
422 let socket = create_input_socket(&keyboard_socket).map_err(|e| {
423 error!("failed configuring virtio keyboard: {}", e);
424 e
425 })?;
426
427 let dev = virtio::new_keyboard(socket).map_err(Error::InputDeviceNew)?;
428
429 Ok(VirtioDeviceStub {
430 dev: Box::new(dev),
431 jail: simple_jail(&cfg, "input_device.policy")?,
432 })
433}
434
435fn create_vinput_device(cfg: &Config, dev_path: &Path) -> DeviceResult {
436 let dev_file = OpenOptions::new()
437 .read(true)
438 .write(true)
439 .open(dev_path)
David Tolnayfd0971d2019-03-04 17:15:57 -0800440 .map_err(|e| Error::OpenVinput(dev_path.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800441
442 let dev = virtio::new_evdev(dev_file).map_err(Error::InputDeviceNew)?;
443
444 Ok(VirtioDeviceStub {
445 dev: Box::new(dev),
446 jail: simple_jail(&cfg, "input_device.policy")?,
447 })
448}
449
450fn create_balloon_device(cfg: &Config, socket: UnixSeqpacket) -> DeviceResult {
451 let dev = virtio::Balloon::new(socket).map_err(Error::BalloonDeviceNew)?;
452
453 Ok(VirtioDeviceStub {
454 dev: Box::new(dev),
455 jail: simple_jail(&cfg, "balloon_device.policy")?,
456 })
457}
458
459fn create_tap_net_device(cfg: &Config, tap_fd: RawFd) -> DeviceResult {
460 // Safe because we ensure that we get a unique handle to the fd.
461 let tap = unsafe {
462 Tap::from_raw_fd(validate_raw_fd(tap_fd).map_err(Error::ValidateRawFd)?)
463 .map_err(Error::CreateTapDevice)?
464 };
465
466 let dev = virtio::Net::from(tap).map_err(Error::NetDeviceNew)?;
467
468 Ok(VirtioDeviceStub {
469 dev: Box::new(dev),
470 jail: simple_jail(&cfg, "net_device.policy")?,
471 })
472}
473
474fn create_net_device(
475 cfg: &Config,
476 host_ip: Ipv4Addr,
477 netmask: Ipv4Addr,
478 mac_address: MacAddress,
479 mem: &GuestMemory,
480) -> DeviceResult {
481 let dev = if cfg.vhost_net {
482 let dev =
483 virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(host_ip, netmask, mac_address, mem)
484 .map_err(Error::VhostNetDeviceNew)?;
485 Box::new(dev) as Box<VirtioDevice>
486 } else {
487 let dev =
488 virtio::Net::<Tap>::new(host_ip, netmask, mac_address).map_err(Error::NetDeviceNew)?;
489 Box::new(dev) as Box<VirtioDevice>
490 };
491
492 let policy = if cfg.vhost_net {
493 "vhost_net_device.policy"
494 } else {
495 "net_device.policy"
496 };
497
498 Ok(VirtioDeviceStub {
499 dev,
500 jail: simple_jail(&cfg, policy)?,
501 })
502}
503
504#[cfg(feature = "gpu")]
505fn create_gpu_device(
506 cfg: &Config,
507 exit_evt: &EventFd,
508 gpu_socket: virtio::resource_bridge::ResourceResponseSocket,
509 wayland_socket_path: &Path,
510) -> DeviceResult {
511 let jailed_wayland_path = Path::new("/wayland-0");
512
513 let dev = virtio::Gpu::new(
514 exit_evt.try_clone().map_err(Error::CloneEventFd)?,
515 Some(gpu_socket),
Lepton Wu9105e9f2019-03-14 11:38:31 -0700516 if cfg.sandbox {
David Tolnay2b089fc2019-03-04 15:33:22 -0800517 &jailed_wayland_path
518 } else {
519 wayland_socket_path
520 },
521 );
522
523 let jail = match simple_jail(&cfg, "gpu_device.policy")? {
524 Some(mut jail) => {
525 // Create a tmpfs in the device's root directory so that we can bind mount the
526 // dri directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
527 jail.mount_with_data(
528 Path::new("none"),
529 Path::new("/"),
530 "tmpfs",
531 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
532 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800533 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800534
535 // Device nodes required for DRM.
536 let sys_dev_char_path = Path::new("/sys/dev/char");
David Tolnayfd0971d2019-03-04 17:15:57 -0800537 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800538 let sys_devices_path = Path::new("/sys/devices");
David Tolnayfd0971d2019-03-04 17:15:57 -0800539 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800540 let drm_dri_path = Path::new("/dev/dri");
David Tolnayfd0971d2019-03-04 17:15:57 -0800541 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800542
543 // Libraries that are required when mesa drivers are dynamically loaded.
544 let lib_path = Path::new("/lib64");
David Tolnayfd0971d2019-03-04 17:15:57 -0800545 jail.mount_bind(lib_path, lib_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800546 let usr_lib_path = Path::new("/usr/lib64");
David Tolnayfd0971d2019-03-04 17:15:57 -0800547 jail.mount_bind(usr_lib_path, usr_lib_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800548
549 // Bind mount the wayland socket into jail's root. This is necessary since each
550 // new wayland context must open() the socket.
David Tolnayfd0971d2019-03-04 17:15:57 -0800551 jail.mount_bind(wayland_socket_path, jailed_wayland_path, true)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800552
553 add_crosvm_user_to_jail(&mut jail, "gpu")?;
554
555 Some(jail)
556 }
557 None => None,
558 };
559
560 Ok(VirtioDeviceStub {
561 dev: Box::new(dev),
562 jail,
563 })
564}
565
566fn create_wayland_device(
567 cfg: &Config,
568 socket_path: &Path,
569 socket: UnixSeqpacket,
570 resource_bridge: Option<virtio::resource_bridge::ResourceRequestSocket>,
571) -> DeviceResult {
572 let wayland_socket_dir = socket_path.parent().ok_or(Error::InvalidWaylandPath)?;
573 let wayland_socket_name = socket_path.file_name().ok_or(Error::InvalidWaylandPath)?;
574 let jailed_wayland_dir = Path::new("/wayland");
575 let jailed_wayland_path = jailed_wayland_dir.join(wayland_socket_name);
576
577 let dev = virtio::Wl::new(
Lepton Wu9105e9f2019-03-14 11:38:31 -0700578 if cfg.sandbox {
David Tolnay2b089fc2019-03-04 15:33:22 -0800579 &jailed_wayland_path
580 } else {
581 socket_path
582 },
583 socket,
584 resource_bridge,
585 )
586 .map_err(Error::WaylandDeviceNew)?;
587
588 let jail = match simple_jail(&cfg, "wl_device.policy")? {
589 Some(mut jail) => {
590 // Create a tmpfs in the device's root directory so that we can bind mount the wayland
591 // socket directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
592 jail.mount_with_data(
593 Path::new("none"),
594 Path::new("/"),
595 "tmpfs",
596 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
597 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800598 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800599
600 // Bind mount the wayland socket's directory into jail's root. This is necessary since
601 // each new wayland context must open() the socket. If the wayland socket is ever
602 // destroyed and remade in the same host directory, new connections will be possible
603 // without restarting the wayland device.
David Tolnayfd0971d2019-03-04 17:15:57 -0800604 jail.mount_bind(wayland_socket_dir, jailed_wayland_dir, true)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800605
606 add_crosvm_user_to_jail(&mut jail, "Wayland")?;
607
608 Some(jail)
609 }
610 None => None,
611 };
612
613 Ok(VirtioDeviceStub {
614 dev: Box::new(dev),
615 jail,
616 })
617}
618
619fn create_vhost_vsock_device(cfg: &Config, cid: u64, mem: &GuestMemory) -> DeviceResult {
620 let dev = virtio::vhost::Vsock::new(cid, mem).map_err(Error::VhostVsockDeviceNew)?;
621
622 Ok(VirtioDeviceStub {
623 dev: Box::new(dev),
624 jail: simple_jail(&cfg, "vhost_vsock_device.policy")?,
625 })
626}
627
628fn create_9p_device(cfg: &Config, chronos: Ids, src: &Path, tag: &str) -> DeviceResult {
629 let (jail, root) = match simple_jail(&cfg, "9p_device.policy")? {
630 Some(mut jail) => {
631 // The shared directory becomes the root of the device's file system.
632 let root = Path::new("/");
David Tolnayfd0971d2019-03-04 17:15:57 -0800633 jail.mount_bind(src, root, true)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800634
635 // Set the uid/gid for the jailed process, and give a basic id map. This
636 // is required for the above bind mount to work.
637 jail.change_uid(chronos.uid);
638 jail.change_gid(chronos.gid);
639 jail.uidmap(&format!("{0} {0} 1", chronos.uid))
640 .map_err(Error::SettingUidMap)?;
641 jail.gidmap(&format!("{0} {0} 1", chronos.gid))
642 .map_err(Error::SettingGidMap)?;
643
644 (Some(jail), root)
645 }
646 None => {
647 // There's no bind mount so we tell the server to treat the source directory as the
David Tolnay9deb7d72019-03-05 18:25:44 -0800648 // root.
David Tolnay2b089fc2019-03-04 15:33:22 -0800649 (None, src)
650 }
651 };
652
653 let dev = virtio::P9::new(root, tag).map_err(Error::P9DeviceNew)?;
654
655 Ok(VirtioDeviceStub {
656 dev: Box::new(dev),
657 jail,
658 })
659}
660
661fn create_virtio_devices(
662 cfg: &Config,
Zach Reizner55a9e502018-10-03 10:22:32 -0700663 mem: &GuestMemory,
664 _exit_evt: &EventFd,
Zach Reiznera60744b2019-02-13 17:33:32 -0800665 wayland_device_socket: UnixSeqpacket,
666 balloon_device_socket: UnixSeqpacket,
667 disk_device_sockets: &mut Vec<UnixSeqpacket>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800668) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -0700669 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -0800670
Zach Reizner8fb52112017-12-13 16:04:39 -0800671 for disk in &cfg.disks {
Daniel Verkamp92f73d72018-12-04 13:17:46 -0800672 let disk_device_socket = disk_device_sockets.remove(0);
David Tolnay2b089fc2019-03-04 15:33:22 -0800673 devs.push(create_block_device(cfg, disk, disk_device_socket)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800674 }
675
David Tolnay2b089fc2019-03-04 15:33:22 -0800676 devs.push(create_rng_device(cfg)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800677
David Tolnayde6b29a2018-12-20 11:49:46 -0800678 #[cfg(feature = "tpm")]
679 {
David Tolnay43f8e212019-02-13 17:28:16 -0800680 if cfg.software_tpm {
David Tolnay2b089fc2019-03-04 15:33:22 -0800681 devs.push(create_tpm_device(cfg)?);
David Tolnay43f8e212019-02-13 17:28:16 -0800682 }
David Tolnayde6b29a2018-12-20 11:49:46 -0800683 }
684
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800685 if let Some(single_touch_spec) = &cfg.virtio_single_touch {
686 devs.push(create_single_touch_device(cfg, single_touch_spec)?);
687 }
688
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800689 if let Some(trackpad_spec) = &cfg.virtio_trackpad {
David Tolnay2b089fc2019-03-04 15:33:22 -0800690 devs.push(create_trackpad_device(cfg, trackpad_spec)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800691 }
692
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800693 if let Some(mouse_socket) = &cfg.virtio_mouse {
David Tolnay2b089fc2019-03-04 15:33:22 -0800694 devs.push(create_mouse_device(cfg, mouse_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800695 }
696
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800697 if let Some(keyboard_socket) = &cfg.virtio_keyboard {
David Tolnay2b089fc2019-03-04 15:33:22 -0800698 devs.push(create_keyboard_device(cfg, keyboard_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800699 }
700
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800701 for dev_path in &cfg.virtio_input_evdevs {
David Tolnay2b089fc2019-03-04 15:33:22 -0800702 devs.push(create_vinput_device(cfg, dev_path)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800703 }
704
David Tolnay2b089fc2019-03-04 15:33:22 -0800705 devs.push(create_balloon_device(cfg, balloon_device_socket)?);
Dylan Reid295ccac2017-11-06 14:06:24 -0800706
Zach Reizner39aa26b2017-12-12 18:03:23 -0800707 // We checked above that if the IP is defined, then the netmask is, too.
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800708 for tap_fd in &cfg.tap_fd {
David Tolnay2b089fc2019-03-04 15:33:22 -0800709 devs.push(create_tap_net_device(cfg, *tap_fd)?);
Jorge E. Moreirab7952802019-02-12 16:43:05 -0800710 }
711
David Tolnay2b089fc2019-03-04 15:33:22 -0800712 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
713 (cfg.host_ip, cfg.netmask, cfg.mac_address)
714 {
715 devs.push(create_net_device(cfg, host_ip, netmask, mac_address, mem)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800716 }
717
David Tolnayfa701712019-02-13 16:42:54 -0800718 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
David Tolnay2b089fc2019-03-04 15:33:22 -0800719 let mut resource_bridge_wl_socket = None::<virtio::resource_bridge::ResourceRequestSocket>;
David Tolnayfa701712019-02-13 16:42:54 -0800720
Zach Reizner3a8100a2017-09-13 19:15:43 -0700721 #[cfg(feature = "gpu")]
722 {
723 if cfg.gpu {
David Tolnay2b089fc2019-03-04 15:33:22 -0800724 if let Some(wayland_socket_path) = &cfg.wayland_socket_path {
Zach Reizneraa575662018-08-15 10:46:32 -0700725 let (wl_socket, gpu_socket) =
David Tolnay2b089fc2019-03-04 15:33:22 -0800726 virtio::resource_bridge::pair().map_err(Error::CreateSocket)?;
Zach Reizneraa575662018-08-15 10:46:32 -0700727 resource_bridge_wl_socket = Some(wl_socket);
728
David Tolnay2b089fc2019-03-04 15:33:22 -0800729 devs.push(create_gpu_device(
730 cfg,
731 _exit_evt,
732 gpu_socket,
733 wayland_socket_path,
734 )?);
David Rileyb22b6132018-08-20 08:11:42 -0700735 }
Zach Reizner3a8100a2017-09-13 19:15:43 -0700736 }
737 }
738
Zach Reizneraa575662018-08-15 10:46:32 -0700739 if let Some(wayland_socket_path) = cfg.wayland_socket_path.as_ref() {
David Tolnay2b089fc2019-03-04 15:33:22 -0800740 devs.push(create_wayland_device(
741 cfg,
742 wayland_socket_path,
743 wayland_device_socket,
744 resource_bridge_wl_socket,
745 )?);
Zach Reizneraa575662018-08-15 10:46:32 -0700746 }
747
748 if let Some(cid) = cfg.cid {
David Tolnay2b089fc2019-03-04 15:33:22 -0800749 devs.push(create_vhost_vsock_device(cfg, cid, mem)?);
Zach Reizneraa575662018-08-15 10:46:32 -0700750 }
751
David Tolnayfd0971d2019-03-04 17:15:57 -0800752 let chronos = get_chronos_ids();
David Tolnay2b089fc2019-03-04 15:33:22 -0800753
754 for (src, tag) in &cfg.shared_dirs {
755 devs.push(create_9p_device(cfg, chronos, src, tag)?);
756 }
757
758 Ok(devs)
759}
760
761fn create_devices(
762 cfg: Config,
763 mem: &GuestMemory,
764 exit_evt: &EventFd,
765 wayland_device_socket: UnixSeqpacket,
766 balloon_device_socket: UnixSeqpacket,
767 disk_device_sockets: &mut Vec<UnixSeqpacket>,
768) -> DeviceResult<Vec<(Box<PciDevice>, Option<Minijail>)>> {
769 let stubs = create_virtio_devices(
770 &cfg,
771 mem,
772 exit_evt,
773 wayland_device_socket,
774 balloon_device_socket,
775 disk_device_sockets,
776 )?;
777
778 let mut pci_devices = Vec::new();
779
780 for stub in stubs {
781 let dev = VirtioPciDevice::new(mem.clone(), stub.dev).map_err(Error::VirtioPciDev)?;
782 let dev = Box::new(dev) as Box<PciDevice>;
783 pci_devices.push((dev, stub.jail));
784 }
785
786 if cfg.cras_audio {
David Tolnayfd0971d2019-03-04 17:15:57 -0800787 let server = Box::new(CrasClient::new().map_err(Error::CreateCrasClient)?);
David Tolnay2b089fc2019-03-04 15:33:22 -0800788 let cras_audio = devices::Ac97Dev::new(mem.clone(), server);
789
790 pci_devices.push((
791 Box::new(cras_audio),
792 simple_jail(&cfg, "cras_audio_device.policy")?,
793 ));
794 }
795
796 if cfg.null_audio {
797 let server = Box::new(DummyStreamSource::new());
798 let null_audio = devices::Ac97Dev::new(mem.clone(), server);
799
800 pci_devices.push((
801 Box::new(null_audio),
802 simple_jail(&cfg, "null_audio_device.policy")?,
803 ));
804 }
805
806 Ok(pci_devices)
807}
808
809#[derive(Copy, Clone)]
810struct Ids {
811 uid: uid_t,
812 gid: gid_t,
813}
814
David Tolnayfd0971d2019-03-04 17:15:57 -0800815fn get_chronos_ids() -> Ids {
Chirantan Ekboteebd56812018-04-16 19:32:04 -0700816 let chronos_user_group = CStr::from_bytes_with_nul(b"chronos\0").unwrap();
David Tolnay2b089fc2019-03-04 15:33:22 -0800817
Chirantan Ekboteebd56812018-04-16 19:32:04 -0700818 let chronos_uid = match get_user_id(&chronos_user_group) {
819 Ok(u) => u,
820 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -0800821 warn!("falling back to current user id for 9p: {}", e);
Chirantan Ekboteebd56812018-04-16 19:32:04 -0700822 geteuid()
823 }
824 };
David Tolnay2b089fc2019-03-04 15:33:22 -0800825
Chirantan Ekboteebd56812018-04-16 19:32:04 -0700826 let chronos_gid = match get_group_id(&chronos_user_group) {
827 Ok(u) => u,
828 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -0800829 warn!("falling back to current group id for 9p: {}", e);
Chirantan Ekboteebd56812018-04-16 19:32:04 -0700830 getegid()
831 }
832 };
833
David Tolnayfd0971d2019-03-04 17:15:57 -0800834 Ids {
David Tolnay2b089fc2019-03-04 15:33:22 -0800835 uid: chronos_uid,
836 gid: chronos_gid,
David Tolnayfd0971d2019-03-04 17:15:57 -0800837 }
David Tolnay41a6f842019-03-01 16:18:44 -0800838}
839
David Tolnay48c48292019-03-01 16:54:25 -0800840// Set the uid/gid for the jailed process and give a basic id map. This is
841// required for bind mounts to work.
David Tolnayfd0971d2019-03-04 17:15:57 -0800842fn add_crosvm_user_to_jail(jail: &mut Minijail, feature: &str) -> Result<Ids> {
David Tolnay48c48292019-03-01 16:54:25 -0800843 let crosvm_user_group = CStr::from_bytes_with_nul(b"crosvm\0").unwrap();
844
845 let crosvm_uid = match get_user_id(&crosvm_user_group) {
846 Ok(u) => u,
847 Err(e) => {
848 warn!("falling back to current user id for {}: {}", feature, e);
849 geteuid()
850 }
851 };
852
853 let crosvm_gid = match get_group_id(&crosvm_user_group) {
854 Ok(u) => u,
855 Err(e) => {
856 warn!("falling back to current group id for {}: {}", feature, e);
857 getegid()
858 }
859 };
860
861 jail.change_uid(crosvm_uid);
862 jail.change_gid(crosvm_gid);
863 jail.uidmap(&format!("{0} {0} 1", crosvm_uid))
864 .map_err(Error::SettingUidMap)?;
865 jail.gidmap(&format!("{0} {0} 1", crosvm_gid))
866 .map_err(Error::SettingGidMap)?;
867
David Tolnay41a6f842019-03-01 16:18:44 -0800868 Ok(Ids {
869 uid: crosvm_uid,
870 gid: crosvm_gid,
871 })
David Tolnay48c48292019-03-01 16:54:25 -0800872}
873
David Tolnayfd0971d2019-03-04 17:15:57 -0800874fn raw_fd_from_path(path: &Path) -> Result<RawFd> {
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800875 if !path.is_file() {
David Tolnayfd0971d2019-03-04 17:15:57 -0800876 return Err(Error::InvalidFdPath);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800877 }
878 let raw_fd = path
879 .file_name()
880 .and_then(|fd_osstr| fd_osstr.to_str())
881 .and_then(|fd_str| fd_str.parse::<c_int>().ok())
882 .ok_or(Error::InvalidFdPath)?;
David Tolnayfd0971d2019-03-04 17:15:57 -0800883 validate_raw_fd(raw_fd).map_err(Error::ValidateRawFd)
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800884}
885
David Tolnayfd0971d2019-03-04 17:15:57 -0800886fn create_input_socket(path: &Path) -> Result<UnixStream> {
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800887 if path.parent() == Some(Path::new("/proc/self/fd")) {
888 // Safe because we will validate |raw_fd|.
889 unsafe { Ok(UnixStream::from_raw_fd(raw_fd_from_path(path)?)) }
890 } else {
David Tolnayfd0971d2019-03-04 17:15:57 -0800891 UnixStream::connect(path).map_err(Error::InputEventsOpen)
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800892 }
893}
894
Mark Ryan6ed5aea2018-04-20 13:52:35 +0100895fn setup_vcpu_signal_handler() -> Result<()> {
896 unsafe {
897 extern "C" fn handle_signal() {}
898 // Our signal handler does nothing and is trivially async signal safe.
899 register_signal_handler(SIGRTMIN() + 0, handle_signal)
900 .map_err(Error::RegisterSignalHandler)?;
901 }
902 block_signal(SIGRTMIN() + 0).map_err(Error::BlockSignal)?;
903 Ok(())
904}
905
Zach Reizner6a8fdd92019-01-16 14:38:41 -0800906#[derive(Default)]
907struct VcpuRunMode {
908 mtx: Mutex<VmRunMode>,
909 cvar: Condvar,
910}
911
912impl VcpuRunMode {
913 fn set_and_notify(&self, new_mode: VmRunMode) {
914 *self.mtx.lock() = new_mode;
915 self.cvar.notify_all();
916 }
917}
918
Zach Reizner55a9e502018-10-03 10:22:32 -0700919fn run_vcpu(
920 vcpu: Vcpu,
921 cpu_id: u32,
922 start_barrier: Arc<Barrier>,
923 io_bus: devices::Bus,
924 mmio_bus: devices::Bus,
925 exit_evt: EventFd,
Zach Reizner795355a2019-01-16 17:37:57 -0800926 requires_kvmclock_ctrl: bool,
Zach Reizner6a8fdd92019-01-16 14:38:41 -0800927 run_mode_arc: Arc<VcpuRunMode>,
Zach Reizner55a9e502018-10-03 10:22:32 -0700928) -> Result<JoinHandle<()>> {
Zach Reizner8fb52112017-12-13 16:04:39 -0800929 thread::Builder::new()
930 .name(format!("crosvm_vcpu{}", cpu_id))
931 .spawn(move || {
Mark Ryan6ed5aea2018-04-20 13:52:35 +0100932 let mut sig_ok = true;
933 match get_blocked_signals() {
934 Ok(mut v) => {
935 v.retain(|&x| x != SIGRTMIN() + 0);
936 if let Err(e) = vcpu.set_signal_mask(&v) {
937 error!(
David Tolnayb4bd00f2019-02-12 17:51:26 -0800938 "Failed to set the KVM_SIGNAL_MASK for vcpu {} : {}",
Mark Ryan6ed5aea2018-04-20 13:52:35 +0100939 cpu_id, e
940 );
941 sig_ok = false;
942 }
943 }
944 Err(e) => {
945 error!(
David Tolnayb4bd00f2019-02-12 17:51:26 -0800946 "Failed to retrieve signal mask for vcpu {} : {}",
Mark Ryan6ed5aea2018-04-20 13:52:35 +0100947 cpu_id, e
948 );
949 sig_ok = false;
950 }
951 };
Zach Reizner39aa26b2017-12-12 18:03:23 -0800952
Zach Reizner8fb52112017-12-13 16:04:39 -0800953 start_barrier.wait();
Mark Ryan6ed5aea2018-04-20 13:52:35 +0100954
David Tolnay8f3a2322018-11-30 17:11:35 -0800955 if sig_ok {
Zach Reizner6a8fdd92019-01-16 14:38:41 -0800956 'vcpu_loop: loop {
957 let mut interrupted_by_signal = false;
David Tolnay8f3a2322018-11-30 17:11:35 -0800958 match vcpu.run() {
959 Ok(VcpuExit::IoIn { port, mut size }) => {
960 let mut data = [0; 8];
961 if size > data.len() {
962 error!("unsupported IoIn size of {} bytes", size);
963 size = data.len();
Zach Reizner39aa26b2017-12-12 18:03:23 -0800964 }
David Tolnay8f3a2322018-11-30 17:11:35 -0800965 io_bus.read(port as u64, &mut data[..size]);
966 if let Err(e) = vcpu.set_data(&data[..size]) {
David Tolnayb4bd00f2019-02-12 17:51:26 -0800967 error!("failed to set return data for IoIn: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800968 }
Zach Reizner39aa26b2017-12-12 18:03:23 -0800969 }
David Tolnay8f3a2322018-11-30 17:11:35 -0800970 Ok(VcpuExit::IoOut {
971 port,
972 mut size,
973 data,
974 }) => {
975 if size > data.len() {
976 error!("unsupported IoOut size of {} bytes", size);
977 size = data.len();
978 }
979 io_bus.write(port as u64, &data[..size]);
980 }
981 Ok(VcpuExit::MmioRead { address, size }) => {
982 let mut data = [0; 8];
983 mmio_bus.read(address, &mut data[..size]);
984 // Setting data for mmio can not fail.
985 let _ = vcpu.set_data(&data[..size]);
986 }
987 Ok(VcpuExit::MmioWrite {
988 address,
989 size,
990 data,
991 }) => {
992 mmio_bus.write(address, &data[..size]);
993 }
994 Ok(VcpuExit::Hlt) => break,
995 Ok(VcpuExit::Shutdown) => break,
Zach Reizner6a8fdd92019-01-16 14:38:41 -0800996 Ok(VcpuExit::SystemEvent(_, _)) => break,
David Tolnay8f3a2322018-11-30 17:11:35 -0800997 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
998 Err(e) => match e.errno() {
Zach Reizner6a8fdd92019-01-16 14:38:41 -0800999 libc::EINTR => interrupted_by_signal = true,
1000 libc::EAGAIN => {}
David Tolnay8f3a2322018-11-30 17:11:35 -08001001 _ => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001002 error!("vcpu hit unknown error: {}", e);
David Tolnay8f3a2322018-11-30 17:11:35 -08001003 break;
1004 }
1005 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08001006 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001007
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001008 if interrupted_by_signal {
1009 // Try to clear the signal that we use to kick VCPU if it is pending before
1010 // attempting to handle pause requests.
1011 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001012 error!("failed to clear pending signal: {}", e);
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001013 break;
1014 }
1015 let mut run_mode_lock = run_mode_arc.mtx.lock();
1016 loop {
1017 match *run_mode_lock {
1018 VmRunMode::Running => break,
Zach Reizner795355a2019-01-16 17:37:57 -08001019 VmRunMode::Suspending => {
1020 // On KVM implementations that use a paravirtualized clock (e.g.
1021 // x86), a flag must be set to indicate to the guest kernel that
1022 // a VCPU was suspended. The guest kernel will use this flag to
1023 // prevent the soft lockup detection from triggering when this
1024 // VCPU resumes, which could happen days later in realtime.
1025 if requires_kvmclock_ctrl {
1026 if let Err(e) = vcpu.kvmclock_ctrl() {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001027 error!("failed to signal to kvm that vcpu {} is being suspended: {}", cpu_id, e);
Zach Reizner795355a2019-01-16 17:37:57 -08001028 }
1029 }
1030 }
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001031 VmRunMode::Exiting => break 'vcpu_loop,
1032 }
1033 // Give ownership of our exclusive lock to the condition variable that
1034 // will block. When the condition variable is notified, `wait` will
1035 // unblock and return a new exclusive lock.
1036 run_mode_lock = run_mode_arc.cvar.wait(run_mode_lock);
1037 }
1038 }
David Tolnay8f3a2322018-11-30 17:11:35 -08001039 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001040 }
Zach Reizner8fb52112017-12-13 16:04:39 -08001041 exit_evt
Zach Reizner39aa26b2017-12-12 18:03:23 -08001042 .write(1)
1043 .expect("failed to signal vcpu exit eventfd");
David Tolnay2bac1e72018-12-12 14:33:42 -08001044 })
1045 .map_err(Error::SpawnVcpu)
Zach Reizner39aa26b2017-12-12 18:03:23 -08001046}
1047
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001048// Reads the contents of a file and converts them into a u64.
1049fn file_to_u64<P: AsRef<Path>>(path: P) -> io::Result<u64> {
1050 let mut file = File::open(path)?;
1051
1052 let mut buf = [0u8; 32];
1053 let count = file.read(&mut buf)?;
1054
Zach Reizner55a9e502018-10-03 10:22:32 -07001055 let content =
1056 str::from_utf8(&buf[..count]).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1057 content
1058 .trim()
1059 .parse()
1060 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001061}
1062
Dylan Reid059a1882018-07-23 17:58:09 -07001063pub fn run_config(cfg: Config) -> Result<()> {
Lepton Wu9105e9f2019-03-14 11:38:31 -07001064 if cfg.sandbox {
Dylan Reid059a1882018-07-23 17:58:09 -07001065 // Printing something to the syslog before entering minijail so that libc's syslogger has a
1066 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
1067 // access to those files will not be possible.
1068 info!("crosvm entering multiprocess mode");
1069 }
1070
Dylan Reid059a1882018-07-23 17:58:09 -07001071 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
1072 // before any jailed devices have been spawned, so that we can catch any of them that fail very
1073 // quickly.
1074 let sigchld_fd = SignalFd::new(libc::SIGCHLD).map_err(Error::CreateSignalFd)?;
1075
David Tolnay2b089fc2019-03-04 15:33:22 -08001076 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
1077 Some(File::open(initrd_path).map_err(|e| Error::OpenInitrd(initrd_path.clone(), e))?)
Daniel Verkampe403f5c2018-12-11 16:29:26 -08001078 } else {
1079 None
1080 };
1081
Dylan Reid059a1882018-07-23 17:58:09 -07001082 let components = VmComponents {
Dylan Reid059a1882018-07-23 17:58:09 -07001083 memory_mb: (cfg.memory.unwrap_or(256) << 20) as u64,
1084 vcpu_count: cfg.vcpu_count.unwrap_or(1),
David Tolnay2b089fc2019-03-04 15:33:22 -08001085 kernel_image: File::open(&cfg.kernel_path)
Dylan Reid059a1882018-07-23 17:58:09 -07001086 .map_err(|e| Error::OpenKernel(cfg.kernel_path.clone(), e))?,
Tristan Muntsinger4133b012018-12-21 16:01:56 -08001087 android_fstab: cfg
1088 .android_fstab
1089 .as_ref()
David Tolnay2b089fc2019-03-04 15:33:22 -08001090 .map(|x| File::open(x).map_err(|e| Error::OpenAndroidFstab(x.to_path_buf(), e)))
Tristan Muntsinger4133b012018-12-21 16:01:56 -08001091 .map_or(Ok(None), |v| v.map(Some))?,
Daniel Verkampe403f5c2018-12-11 16:29:26 -08001092 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -07001093 extra_kernel_params: cfg.params.clone(),
1094 wayland_dmabuf: cfg.wayland_dmabuf,
Dylan Reid059a1882018-07-23 17:58:09 -07001095 };
1096
Zach Reiznera60744b2019-02-13 17:33:32 -08001097 let control_server_socket = match &cfg.socket_path {
1098 Some(path) => Some(UnlinkUnixSeqpacketListener(
1099 UnixSeqpacketListener::bind(path).map_err(Error::CreateSocket)?,
1100 )),
1101 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07001102 };
Zach Reiznera60744b2019-02-13 17:33:32 -08001103
1104 let mut control_sockets = Vec::new();
Zach Reizner55a9e502018-10-03 10:22:32 -07001105 let (wayland_host_socket, wayland_device_socket) =
Zach Reiznera60744b2019-02-13 17:33:32 -08001106 UnixSeqpacket::pair().map_err(Error::CreateSocket)?;
1107 control_sockets.push(MsgSocket::<VmResponse, VmRequest>::new(wayland_host_socket));
Dylan Reid059a1882018-07-23 17:58:09 -07001108 // Balloon gets a special socket so balloon requests can be forwarded from the main process.
Zach Reizner55a9e502018-10-03 10:22:32 -07001109 let (balloon_host_socket, balloon_device_socket) =
Zach Reiznera60744b2019-02-13 17:33:32 -08001110 UnixSeqpacket::pair().map_err(Error::CreateSocket)?;
Dylan Reid059a1882018-07-23 17:58:09 -07001111
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001112 // Create one control socket per disk.
1113 let mut disk_device_sockets = Vec::new();
1114 let mut disk_host_sockets = Vec::new();
1115 let disk_count = cfg.disks.len();
1116 for _ in 0..disk_count {
1117 let (disk_host_socket, disk_device_socket) =
Zach Reiznera60744b2019-02-13 17:33:32 -08001118 UnixSeqpacket::pair().map_err(Error::CreateSocket)?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001119 disk_device_sockets.push(disk_device_socket);
1120 let disk_host_socket = MsgSocket::<VmRequest, VmResponse>::new(disk_host_socket);
1121 disk_host_sockets.push(disk_host_socket);
1122 }
1123
Miriam Zimmerman26ac9282019-01-29 21:21:48 -08001124 let linux = Arch::build_vm(components, cfg.split_irqchip, |m, e| {
Jianxun Zhang96f2d8e2019-02-20 13:50:42 -08001125 create_devices(
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001126 cfg,
1127 m,
1128 e,
1129 wayland_device_socket,
1130 balloon_device_socket,
1131 &mut disk_device_sockets,
1132 )
David Tolnay2bac1e72018-12-12 14:33:42 -08001133 })
David Tolnaybe034262019-03-04 17:48:36 -08001134 .map_err(Error::BuildVm)?;
Lepton Wu60893882018-11-21 11:06:18 -08001135
1136 let _render_node_host = ();
1137 #[cfg(feature = "gpu-forward")]
1138 let (_render_node_host, linux) = {
1139 // Rebinds linux as mutable.
1140 let mut linux = linux;
1141
1142 // Reserve memory range for GPU buffer allocation in advance to bypass region count
1143 // limitation. We use mremap/MAP_FIXED later to make sure GPU buffers fall into this range.
1144 let gpu_mmap =
1145 MemoryMapping::new_protection(RENDER_NODE_HOST_SIZE as usize, Protection::none())
1146 .map_err(Error::ReserveGpuMemory)?;
1147
1148 // Put the non-accessible memory map into device memory so that no other devices use that
1149 // guest address space.
1150 let gpu_addr = linux
1151 .resources
1152 .allocate_device_addresses(RENDER_NODE_HOST_SIZE)
1153 .ok_or(Error::AllocateGpuDeviceAddress)?;
1154
1155 let host = RenderNodeHost::start(&gpu_mmap, gpu_addr, linux.vm.get_memory().clone());
1156
1157 // Makes the gpu memory accessible at allocated address.
1158 linux
1159 .vm
1160 .add_device_memory(
1161 GuestAddress(gpu_addr),
1162 gpu_mmap,
1163 /* read_only = */ false,
1164 /* log_dirty_pages = */ false,
1165 )
1166 .map_err(Error::AddGpuDeviceMemory)?;
1167 (host, linux)
1168 };
1169
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001170 run_control(
1171 linux,
Zach Reiznera60744b2019-02-13 17:33:32 -08001172 control_server_socket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001173 control_sockets,
1174 balloon_host_socket,
1175 &disk_host_sockets,
1176 sigchld_fd,
Lepton Wu60893882018-11-21 11:06:18 -08001177 _render_node_host,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001178 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07001179}
1180
Zach Reizner55a9e502018-10-03 10:22:32 -07001181fn run_control(
1182 mut linux: RunnableLinuxVm,
Zach Reiznera60744b2019-02-13 17:33:32 -08001183 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
1184 mut control_sockets: Vec<MsgSocket<VmResponse, VmRequest>>,
1185 balloon_host_socket: UnixSeqpacket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001186 disk_host_sockets: &[MsgSocket<VmRequest, VmResponse>],
Zach Reizner55a9e502018-10-03 10:22:32 -07001187 sigchld_fd: SignalFd,
Lepton Wu60893882018-11-21 11:06:18 -08001188 _render_node_host: RenderNodeHost,
Zach Reizner55a9e502018-10-03 10:22:32 -07001189) -> Result<()> {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001190 // Paths to get the currently available memory and the low memory threshold.
David Tolnay5bbbf612018-12-01 17:49:30 -08001191 const LOWMEM_MARGIN: &str = "/sys/kernel/mm/chromeos-low_mem/margin";
1192 const LOWMEM_AVAILABLE: &str = "/sys/kernel/mm/chromeos-low_mem/available";
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001193
1194 // The amount of additional memory to claim back from the VM whenever the system is
1195 // low on memory.
1196 const ONE_GB: u64 = (1 << 30);
1197
Dylan Reid0ed91ab2018-05-31 15:42:18 -07001198 let max_balloon_memory = match linux.vm.get_memory().memory_size() {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001199 // If the VM has at least 1.5 GB, the balloon driver can consume all but the last 1 GB.
1200 n if n >= (ONE_GB / 2) * 3 => n - ONE_GB,
1201 // Otherwise, if the VM has at least 500MB the balloon driver will consume at most
1202 // half of it.
1203 n if n >= (ONE_GB / 2) => n / 2,
1204 // Otherwise, the VM is too small for us to take memory away from it.
1205 _ => 0,
1206 };
1207 let mut current_balloon_memory: u64 = 0;
1208 let balloon_memory_increment: u64 = max_balloon_memory / 16;
1209
Zach Reizner5bed0d22018-03-28 02:31:11 -07001210 #[derive(PollToken)]
1211 enum Token {
1212 Exit,
1213 Stdin,
1214 ChildSignal,
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001215 CheckAvailableMemory,
1216 LowMemory,
1217 LowmemTimer,
Zach Reiznera60744b2019-02-13 17:33:32 -08001218 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07001219 VmControl { index: usize },
1220 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001221
1222 let stdin_handle = stdin();
1223 let stdin_lock = stdin_handle.lock();
1224 stdin_lock
1225 .set_raw_mode()
1226 .expect("failed to set terminal raw mode");
1227
Zach Reizner5bed0d22018-03-28 02:31:11 -07001228 let poll_ctx = PollContext::new().map_err(Error::CreatePollContext)?;
Zach Reizner55a9e502018-10-03 10:22:32 -07001229 poll_ctx
1230 .add(&linux.exit_evt, Token::Exit)
1231 .map_err(Error::PollContextAdd)?;
Zach Reizner5bed0d22018-03-28 02:31:11 -07001232 if let Err(e) = poll_ctx.add(&stdin_handle, Token::Stdin) {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001233 warn!("failed to add stdin to poll context: {}", e);
Zach Reizner5bed0d22018-03-28 02:31:11 -07001234 }
Zach Reizner55a9e502018-10-03 10:22:32 -07001235 poll_ctx
1236 .add(&sigchld_fd, Token::ChildSignal)
1237 .map_err(Error::PollContextAdd)?;
Zach Reiznera60744b2019-02-13 17:33:32 -08001238
1239 if let Some(socket_server) = &control_server_socket {
1240 poll_ctx
1241 .add(socket_server, Token::VmControlServer)
1242 .map_err(Error::PollContextAdd)?;
1243 }
Dylan Reid059a1882018-07-23 17:58:09 -07001244 for (index, socket) in control_sockets.iter().enumerate() {
Zach Reizner55a9e502018-10-03 10:22:32 -07001245 poll_ctx
1246 .add(socket.as_ref(), Token::VmControl { index })
1247 .map_err(Error::PollContextAdd)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001248 }
1249
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001250 // Watch for low memory notifications and take memory back from the VM.
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001251 let low_mem = File::open("/dev/chromeos-low-mem").ok();
1252 if let Some(ref low_mem) = low_mem {
Zach Reizner55a9e502018-10-03 10:22:32 -07001253 poll_ctx
1254 .add(low_mem, Token::LowMemory)
1255 .map_err(Error::PollContextAdd)?;
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001256 } else {
1257 warn!("Unable to open low mem indicator, maybe not a chrome os kernel");
1258 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001259
1260 // Used to rate limit balloon requests.
1261 let mut lowmem_timer = TimerFd::new().map_err(Error::CreateTimerFd)?;
Zach Reizner55a9e502018-10-03 10:22:32 -07001262 poll_ctx
1263 .add(&lowmem_timer, Token::LowmemTimer)
1264 .map_err(Error::PollContextAdd)?;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001265
1266 // Used to check whether it's ok to start giving memory back to the VM.
1267 let mut freemem_timer = TimerFd::new().map_err(Error::CreateTimerFd)?;
Zach Reizner55a9e502018-10-03 10:22:32 -07001268 poll_ctx
1269 .add(&freemem_timer, Token::CheckAvailableMemory)
1270 .map_err(Error::PollContextAdd)?;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001271
1272 // Used to add jitter to timer values so that we don't have a thundering herd problem when
1273 // multiple VMs are running.
Daniel Prilik22006042019-01-14 14:19:04 -08001274 let mut simple_rng = SimpleRng::new(
1275 SystemTime::now()
1276 .duration_since(UNIX_EPOCH)
1277 .expect("time went backwards")
1278 .subsec_nanos() as u64,
1279 );
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001280
Dmitry Torokhov71006072019-03-06 10:56:51 -08001281 // Before starting VCPUs, in case we started with some capabilities, drop them all.
1282 drop_capabilities().map_err(Error::DropCapabilities)?;
1283
Daniel Verkamp37c4a782019-01-04 10:44:17 -08001284 let mut vcpu_handles = Vec::with_capacity(linux.vcpus.len());
1285 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpus.len() + 1));
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001286 let run_mode_arc = Arc::new(VcpuRunMode::default());
Dylan Reid059a1882018-07-23 17:58:09 -07001287 setup_vcpu_signal_handler()?;
1288 for (cpu_id, vcpu) in linux.vcpus.into_iter().enumerate() {
Zach Reizner55a9e502018-10-03 10:22:32 -07001289 let handle = run_vcpu(
1290 vcpu,
1291 cpu_id as u32,
1292 vcpu_thread_barrier.clone(),
1293 linux.io_bus.clone(),
1294 linux.mmio_bus.clone(),
1295 linux.exit_evt.try_clone().map_err(Error::CloneEventFd)?,
Zach Reizner795355a2019-01-16 17:37:57 -08001296 linux.vm.check_extension(Cap::KvmclockCtrl),
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001297 run_mode_arc.clone(),
Zach Reizner55a9e502018-10-03 10:22:32 -07001298 )?;
Dylan Reid059a1882018-07-23 17:58:09 -07001299 vcpu_handles.push(handle);
1300 }
1301 vcpu_thread_barrier.wait();
1302
Zach Reizner39aa26b2017-12-12 18:03:23 -08001303 'poll: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07001304 let events = {
1305 match poll_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001306 Ok(v) => v,
1307 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001308 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001309 break;
1310 }
1311 }
1312 };
Zach Reiznera60744b2019-02-13 17:33:32 -08001313
1314 let mut vm_control_indices_to_remove = Vec::new();
Zach Reizner5bed0d22018-03-28 02:31:11 -07001315 for event in events.iter_readable() {
1316 match event.token() {
1317 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001318 info!("vcpu requested shutdown");
1319 break 'poll;
1320 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001321 Token::Stdin => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001322 let mut out = [0u8; 64];
1323 match stdin_lock.read_raw(&mut out[..]) {
1324 Ok(0) => {
1325 // Zero-length read indicates EOF. Remove from pollables.
Zach Reizner5bed0d22018-03-28 02:31:11 -07001326 let _ = poll_ctx.delete(&stdin_handle);
Zach Reizner55a9e502018-10-03 10:22:32 -07001327 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001328 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001329 warn!("error while reading stdin: {}", e);
Zach Reizner5bed0d22018-03-28 02:31:11 -07001330 let _ = poll_ctx.delete(&stdin_handle);
Zach Reizner55a9e502018-10-03 10:22:32 -07001331 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001332 Ok(count) => {
Zach Reizner55a9e502018-10-03 10:22:32 -07001333 linux
1334 .stdio_serial
Zach Reizner39aa26b2017-12-12 18:03:23 -08001335 .lock()
Zach Reizner39aa26b2017-12-12 18:03:23 -08001336 .queue_input_bytes(&out[..count])
1337 .expect("failed to queue bytes into serial port");
Zach Reizner55a9e502018-10-03 10:22:32 -07001338 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001339 }
1340 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001341 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001342 // Print all available siginfo structs, then exit the loop.
David Tolnayf5032762018-12-03 10:46:45 -08001343 while let Some(siginfo) = sigchld_fd.read().map_err(Error::SignalFd)? {
Zach Reizner3ba00982019-01-23 19:04:43 -08001344 let pid = siginfo.ssi_pid;
1345 let pid_label = match linux.pid_debug_label_map.get(&pid) {
1346 Some(label) => format!("{} (pid {})", label, pid),
1347 None => format!("pid {}", pid),
1348 };
David Tolnayf5032762018-12-03 10:46:45 -08001349 error!(
1350 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08001351 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08001352 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08001353 }
David Tolnayf5032762018-12-03 10:46:45 -08001354 break 'poll;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001355 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001356 Token::CheckAvailableMemory => {
1357 // Acknowledge the timer.
1358 freemem_timer.wait().map_err(Error::TimerFd)?;
1359 if current_balloon_memory == 0 {
1360 // Nothing to see here.
1361 if let Err(e) = freemem_timer.clear() {
1362 warn!("unable to clear available memory check timer: {}", e);
1363 }
1364 continue;
1365 }
1366
1367 // Otherwise see if we can free up some memory.
1368 let margin = file_to_u64(LOWMEM_MARGIN).map_err(Error::ReadLowmemMargin)?;
Zach Reizner55a9e502018-10-03 10:22:32 -07001369 let available =
1370 file_to_u64(LOWMEM_AVAILABLE).map_err(Error::ReadLowmemAvailable)?;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001371
1372 // `available` and `margin` are specified in MB while `balloon_memory_increment` is in
1373 // bytes. So to correctly compare them we need to turn the increment value into MB.
Zach Reizner55a9e502018-10-03 10:22:32 -07001374 if available >= margin + 2 * (balloon_memory_increment >> 20) {
1375 current_balloon_memory =
1376 if current_balloon_memory >= balloon_memory_increment {
1377 current_balloon_memory - balloon_memory_increment
1378 } else {
1379 0
1380 };
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001381 let mut buf = [0u8; mem::size_of::<u64>()];
1382 LittleEndian::write_u64(&mut buf, current_balloon_memory);
Dylan Reid059a1882018-07-23 17:58:09 -07001383 if let Err(e) = balloon_host_socket.send(&buf) {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001384 warn!("failed to send memory value to balloon device: {}", e);
1385 }
1386 }
1387 }
1388 Token::LowMemory => {
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001389 if let Some(ref low_mem) = low_mem {
1390 let old_balloon_memory = current_balloon_memory;
Zach Reizner55a9e502018-10-03 10:22:32 -07001391 current_balloon_memory = min(
1392 current_balloon_memory + balloon_memory_increment,
1393 max_balloon_memory,
1394 );
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001395 if current_balloon_memory != old_balloon_memory {
1396 let mut buf = [0u8; mem::size_of::<u64>()];
1397 LittleEndian::write_u64(&mut buf, current_balloon_memory);
Dylan Reid059a1882018-07-23 17:58:09 -07001398 if let Err(e) = balloon_host_socket.send(&buf) {
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001399 warn!("failed to send memory value to balloon device: {}", e);
1400 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001401 }
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001402
1403 // Stop polling the lowmem device until the timer fires.
1404 poll_ctx.delete(low_mem).map_err(Error::PollContextDelete)?;
1405
1406 // Add some jitter to the timer so that if there are multiple VMs running
1407 // they don't all start ballooning at exactly the same time.
Daniel Prilik22006042019-01-14 14:19:04 -08001408 let lowmem_dur = Duration::from_millis(1000 + simple_rng.rng() % 200);
Zach Reizner55a9e502018-10-03 10:22:32 -07001409 lowmem_timer
1410 .reset(lowmem_dur, None)
1411 .map_err(Error::ResetTimerFd)?;
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001412
1413 // Also start a timer to check when we can start giving memory back. Do the
1414 // first check after a minute (with jitter) and subsequent checks after
1415 // every 30 seconds (with jitter).
Daniel Prilik22006042019-01-14 14:19:04 -08001416 let freemem_dur = Duration::from_secs(60 + simple_rng.rng() % 12);
1417 let freemem_int = Duration::from_secs(30 + simple_rng.rng() % 6);
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001418 freemem_timer
1419 .reset(freemem_dur, Some(freemem_int))
1420 .map_err(Error::ResetTimerFd)?;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001421 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001422 }
1423 Token::LowmemTimer => {
1424 // Acknowledge the timer.
1425 lowmem_timer.wait().map_err(Error::TimerFd)?;
1426
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001427 if let Some(ref low_mem) = low_mem {
1428 // Start polling the lowmem device again.
Zach Reizner55a9e502018-10-03 10:22:32 -07001429 poll_ctx
1430 .add(low_mem, Token::LowMemory)
1431 .map_err(Error::PollContextAdd)?;
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001432 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001433 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001434 Token::VmControlServer => {
1435 if let Some(socket_server) = &control_server_socket {
1436 match socket_server.accept() {
1437 Ok(socket) => {
1438 poll_ctx
1439 .add(
1440 &socket,
1441 Token::VmControl {
1442 index: control_sockets.len(),
1443 },
1444 )
1445 .map_err(Error::PollContextAdd)?;
1446 control_sockets.push(MsgSocket::new(socket));
1447 }
1448 Err(e) => error!("failed to accept socket: {}", e),
1449 }
1450 }
1451 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001452 Token::VmControl { index } => {
Daniel Verkamp37c4a782019-01-04 10:44:17 -08001453 if let Some(socket) = control_sockets.get(index) {
Jingkui Wange13b1802018-10-03 13:04:47 -07001454 match socket.recv() {
Zach Reizner5bed0d22018-03-28 02:31:11 -07001455 Ok(request) => {
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001456 let mut run_mode_opt = None;
Zach Reizner55a9e502018-10-03 10:22:32 -07001457 let response = request.execute(
1458 &mut linux.vm,
1459 &mut linux.resources,
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001460 &mut run_mode_opt,
Zach Reizner55a9e502018-10-03 10:22:32 -07001461 &balloon_host_socket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001462 disk_host_sockets,
Zach Reizner55a9e502018-10-03 10:22:32 -07001463 );
Jingkui Wange13b1802018-10-03 13:04:47 -07001464 if let Err(e) = socket.send(&response) {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001465 error!("failed to send VmResponse: {}", e);
Zach Reizner5bed0d22018-03-28 02:31:11 -07001466 }
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001467 if let Some(run_mode) = run_mode_opt {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001468 info!("control socket changed run mode to {}", run_mode);
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001469 match run_mode {
1470 VmRunMode::Exiting => {
1471 break 'poll;
1472 }
1473 other => {
1474 run_mode_arc.set_and_notify(other);
1475 for handle in &vcpu_handles {
1476 let _ = handle.kill(SIGRTMIN() + 0);
1477 }
1478 }
1479 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001480 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001481 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001482 Err(e) => {
1483 if let MsgError::BadRecvSize { actual: 0, .. } = e {
1484 vm_control_indices_to_remove.push(index);
1485 } else {
1486 error!("failed to recv VmRequest: {}", e);
1487 }
1488 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001489 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001490 }
1491 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001492 }
1493 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001494
Zach Reizner5bed0d22018-03-28 02:31:11 -07001495 for event in events.iter_hungup() {
Zach Reiznera60744b2019-02-13 17:33:32 -08001496 match event.token() {
1497 Token::Exit => {}
1498 Token::Stdin => {
1499 let _ = poll_ctx.delete(&stdin_handle);
1500 }
1501 Token::ChildSignal => {}
1502 Token::CheckAvailableMemory => {}
1503 Token::LowMemory => {}
1504 Token::LowmemTimer => {}
1505 Token::VmControlServer => {}
1506 Token::VmControl { index } => {
1507 // It's possible more data is readable and buffered while the socket is hungup,
1508 // so don't delete the socket from the poll context until we're sure all the
1509 // data is read.
1510 match control_sockets.get(index).map(|s| s.get_readable_bytes()) {
1511 Some(Ok(0)) | Some(Err(_)) => vm_control_indices_to_remove.push(index),
1512 Some(Ok(x)) => info!("control index {} has {} bytes readable", index, x),
1513 _ => {}
Zach Reizner55a9e502018-10-03 10:22:32 -07001514 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001515 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001516 }
1517 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001518
1519 // Sort in reverse so the highest indexes are removed first. This removal algorithm
1520 // preserved correct indexes as each element is removed.
1521 vm_control_indices_to_remove.sort_unstable_by(|a, b| b.cmp(a));
1522 vm_control_indices_to_remove.dedup();
1523 for index in vm_control_indices_to_remove {
1524 control_sockets.swap_remove(index);
1525 if let Some(socket) = control_sockets.get(index) {
1526 poll_ctx
1527 .add(socket, Token::VmControl { index })
1528 .map_err(Error::PollContextAdd)?;
1529 }
1530 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001531 }
1532
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001533 // VCPU threads MUST see the VmRunMode flag, otherwise they may re-enter the VM.
1534 run_mode_arc.set_and_notify(VmRunMode::Exiting);
Dylan Reid059a1882018-07-23 17:58:09 -07001535 for handle in vcpu_handles {
Dmitry Torokhovcd405332018-02-16 16:25:54 -08001536 match handle.kill(SIGRTMIN() + 0) {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001537 Ok(_) => {
1538 if let Err(e) = handle.join() {
1539 error!("failed to join vcpu thread: {:?}", e);
1540 }
1541 }
David Tolnayb4bd00f2019-02-12 17:51:26 -08001542 Err(e) => error!("failed to kill vcpu thread: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -08001543 }
1544 }
1545
1546 stdin_lock
1547 .set_canon_mode()
1548 .expect("failed to restore canonical mode for terminal");
1549
1550 Ok(())
1551}