blob: 24ef5ecfc3806325fcd2d6b4e4fb923c4244a1e1 [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use std;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07006use std::cmp::min;
Jakub Starona3411ea2019-04-24 10:55:25 -07007use std::convert::TryFrom;
David Tolnayfdac5ed2019-03-08 16:56:14 -08008use std::error::Error as StdError;
Dylan Reid059a1882018-07-23 17:58:09 -07009use std::ffi::CStr;
David Tolnayc69f9752019-03-01 18:07:56 -080010use std::fmt::{self, Display};
Dylan Reid059a1882018-07-23 17:58:09 -070011use std::fs::{File, OpenOptions};
Zach Reizner55a9e502018-10-03 10:22:32 -070012use std::io::{self, stdin, Read};
Daniel Verkamp94c35272019-09-12 13:31:30 -070013use std::mem;
David Tolnay2b089fc2019-03-04 15:33:22 -080014use std::net::Ipv4Addr;
Daniel Verkamp6f9215c2019-08-20 09:41:22 -070015#[cfg(feature = "gpu")]
Zach Reizner0f2cfb02019-06-19 17:46:03 -070016use std::num::NonZeroU8;
Jakub Starond99cd0a2019-04-11 14:09:39 -070017use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
Zach Reiznera60744b2019-02-13 17:33:32 -080018use std::os::unix::net::UnixStream;
Zach Reizner39aa26b2017-12-12 18:03:23 -080019use std::path::{Path, PathBuf};
Chirantan Ekbote448516e2018-07-24 16:07:42 -070020use std::str;
Dylan Reid059a1882018-07-23 17:58:09 -070021use std::sync::{Arc, Barrier};
Zach Reizner39aa26b2017-12-12 18:03:23 -080022use std::thread;
23use std::thread::JoinHandle;
Daniel Prilik22006042019-01-14 14:19:04 -080024use std::time::{Duration, SystemTime, UNIX_EPOCH};
Zach Reizner39aa26b2017-12-12 18:03:23 -080025
David Tolnay41a6f842019-03-01 16:18:44 -080026use libc::{self, c_int, gid_t, uid_t};
Zach Reizner39aa26b2017-12-12 18:03:23 -080027
Dylan Reid3082e8e2019-01-07 10:33:48 -080028use audio_streams::DummyStreamSource;
David Tolnay2b089fc2019-03-04 15:33:22 -080029use devices::virtio::{self, VirtioDevice};
Xiong Zhang17b0daf2019-04-23 17:14:50 +080030use devices::{
31 self, HostBackendDeviceProvider, PciDevice, VfioDevice, VfioPciDevice, VirtioPciDevice,
32 XhciController,
33};
Zach Reizner39aa26b2017-12-12 18:03:23 -080034use io_jail::{self, Minijail};
Zach Reizner39aa26b2017-12-12 18:03:23 -080035use kvm::*;
paulhsiaf052cfe2019-01-22 15:22:25 +080036use libcras::CrasClient;
Zach Reiznera60744b2019-02-13 17:33:32 -080037use msg_socket::{MsgError, MsgReceiver, MsgSender, MsgSocket};
David Tolnay2b089fc2019-03-04 15:33:22 -080038use net_util::{Error as NetError, MacAddress, Tap};
Daniel Prilik22006042019-01-14 14:19:04 -080039use rand_ish::SimpleRng;
David Tolnay3df35522019-03-11 12:36:30 -070040use remain::sorted;
Xiong Zhang87a3b442019-10-29 17:32:44 +080041use resources::{Alloc, MmioType, SystemAllocator};
Zach Reizner6a8fdd92019-01-16 14:38:41 -080042use sync::{Condvar, Mutex};
Jakub Starond99cd0a2019-04-11 14:09:39 -070043use sys_util::net::{UnixSeqpacket, UnixSeqpacketListener, UnlinkUnixSeqpacketListener};
Jakub Starona3411ea2019-04-24 10:55:25 -070044
Zach Reiznera60744b2019-02-13 17:33:32 -080045use sys_util::{
David Tolnay633426a2019-04-12 12:18:35 -070046 self, block_signal, clear_signal, drop_capabilities, error, flock, get_blocked_signals,
Fletcher Woodruff82ff3972019-10-02 13:11:34 -060047 get_group_id, get_user_id, getegid, geteuid, info, register_rt_signal_handler,
48 set_cpu_affinity, validate_raw_fd, warn, EventFd, FlockOperation, GuestAddress, GuestMemory,
49 Killable, MemoryMapping, PollContext, PollToken, Protection, SignalFd, Terminal, TimerFd,
50 WatchingEvents, SIGRTMIN,
Zach Reiznera60744b2019-02-13 17:33:32 -080051};
Jason D. Clinton865323d2017-09-27 22:04:03 -060052use vhost;
Jakub Starone7c59052019-04-09 12:31:14 -070053use vm_control::{
Jakub Staron1f828d72019-04-11 12:49:29 -070054 BalloonControlCommand, BalloonControlRequestSocket, BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -070055 DiskControlCommand, DiskControlRequestSocket, DiskControlResponseSocket, DiskControlResult,
Xiong Zhanga5d248c2019-09-17 14:17:19 -070056 UsbControlSocket, VmControlResponseSocket, VmIrqRequest, VmIrqResponse, VmIrqResponseSocket,
57 VmMemoryControlRequestSocket, VmMemoryControlResponseSocket, VmMemoryRequest, VmMemoryResponse,
58 VmRunMode,
Jakub Starone7c59052019-04-09 12:31:14 -070059};
Zach Reizner39aa26b2017-12-12 18:03:23 -080060
Cody Schuffelen6d1ab502019-05-21 12:12:38 -070061use crate::{Config, DiskOption, Executable, TouchDeviceOption};
Zach Reizner39aa26b2017-12-12 18:03:23 -080062
Cody Schuffelen6d1ab502019-05-21 12:12:38 -070063use arch::{self, LinuxArch, RunnableLinuxVm, VirtioDeviceStub, VmComponents, VmImage};
Sonny Raoed517d12018-02-13 22:09:43 -080064
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080065#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
66use aarch64::AArch64 as Arch;
Zach Reizner55a9e502018-10-03 10:22:32 -070067#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
68use x86_64::X8664arch as Arch;
Zach Reizner39aa26b2017-12-12 18:03:23 -080069
Lepton Wu60893882018-11-21 11:06:18 -080070#[cfg(feature = "gpu-forward")]
David Tolnayaecf9a42019-04-11 14:30:00 -070071use render_node_forward::*;
Lepton Wu60893882018-11-21 11:06:18 -080072#[cfg(not(feature = "gpu-forward"))]
73type RenderNodeHost = ();
74
David Tolnay3df35522019-03-11 12:36:30 -070075#[sorted]
Dylan Reid059a1882018-07-23 17:58:09 -070076#[derive(Debug)]
Zach Reizner39aa26b2017-12-12 18:03:23 -080077pub enum Error {
Lepton Wu60893882018-11-21 11:06:18 -080078 AddGpuDeviceMemory(sys_util::Error),
Jakub Starona3411ea2019-04-24 10:55:25 -070079 AddPmemDeviceMemory(sys_util::Error),
Lepton Wu60893882018-11-21 11:06:18 -080080 AllocateGpuDeviceAddress,
Jakub Starona3411ea2019-04-24 10:55:25 -070081 AllocatePmemDeviceAddress(resources::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -080082 BalloonDeviceNew(virtio::BalloonError),
Zach Reizner39aa26b2017-12-12 18:03:23 -080083 BlockDeviceNew(sys_util::Error),
Mark Ryan6ed5aea2018-04-20 13:52:35 +010084 BlockSignal(sys_util::signal::Error),
David Tolnaybe034262019-03-04 17:48:36 -080085 BuildVm(<Arch as LinuxArch>::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -080086 ChownTpmStorage(sys_util::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080087 CloneEventFd(sys_util::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -080088 CreateCrasClient(libcras::Error),
Cody Schuffelen7d533e52019-07-02 16:54:05 -070089 CreateDiskError(disk::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080090 CreateEventFd(sys_util::Error),
Zach Reizner5bed0d22018-03-28 02:31:11 -070091 CreatePollContext(sys_util::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080092 CreateSignalFd(sys_util::SignalFdError),
93 CreateSocket(io::Error),
Chirantan Ekbote49fa08f2018-11-16 13:26:53 -080094 CreateTapDevice(NetError),
Chirantan Ekbote448516e2018-07-24 16:07:42 -070095 CreateTimerFd(sys_util::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -080096 CreateTpmStorage(PathBuf, io::Error),
Jingkui Wang100e6e42019-03-08 20:41:57 -080097 CreateUsbProvider(devices::usb::host_backend::error::Error),
Xiong Zhang17b0daf2019-04-23 17:14:50 +080098 CreateVfioDevice(devices::vfio::VfioError),
Zach Reizner39aa26b2017-12-12 18:03:23 -080099 DeviceJail(io_jail::Error),
100 DevicePivotRoot(io_jail::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800101 Disk(io::Error),
Stephen Barberc79de2d2018-02-21 14:17:27 -0800102 DiskImageLock(sys_util::Error),
Dmitry Torokhov71006072019-03-06 10:56:51 -0800103 DropCapabilities(sys_util::Error),
Lepton Wu39133a02019-02-27 12:42:29 -0800104 InputDeviceNew(virtio::InputError),
105 InputEventsOpen(std::io::Error),
Dylan Reid20566442018-04-02 15:06:15 -0700106 InvalidFdPath,
Zach Reizner579bd2c2018-09-14 15:43:33 -0700107 InvalidWaylandPath,
David Tolnayfd0971d2019-03-04 17:15:57 -0800108 IoJail(io_jail::Error),
David Tolnayfdac5ed2019-03-08 16:56:14 -0800109 LoadKernel(Box<dyn StdError>),
David Tolnay2b089fc2019-03-04 15:33:22 -0800110 NetDeviceNew(virtio::NetError),
Tristan Muntsinger4133b012018-12-21 16:01:56 -0800111 OpenAndroidFstab(PathBuf, io::Error),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700112 OpenBios(PathBuf, io::Error),
Daniel Verkampe403f5c2018-12-11 16:29:26 -0800113 OpenInitrd(PathBuf, io::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800114 OpenKernel(PathBuf, io::Error),
David Tolnayfd0971d2019-03-04 17:15:57 -0800115 OpenVinput(PathBuf, io::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800116 P9DeviceNew(virtio::P9Error),
Lepton Wu39133a02019-02-27 12:42:29 -0800117 PivotRootDoesntExist(&'static str),
Jakub Starona3411ea2019-04-24 10:55:25 -0700118 PmemDeviceImageTooBig,
119 PmemDeviceNew(sys_util::Error),
Zach Reizner5bed0d22018-03-28 02:31:11 -0700120 PollContextAdd(sys_util::Error),
Chirantan Ekbote448516e2018-07-24 16:07:42 -0700121 PollContextDelete(sys_util::Error),
Chirantan Ekbote448516e2018-07-24 16:07:42 -0700122 ReadLowmemAvailable(io::Error),
123 ReadLowmemMargin(io::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700124 RegisterBalloon(arch::DeviceRegistrationError),
125 RegisterBlock(arch::DeviceRegistrationError),
126 RegisterGpu(arch::DeviceRegistrationError),
127 RegisterNet(arch::DeviceRegistrationError),
128 RegisterP9(arch::DeviceRegistrationError),
129 RegisterRng(arch::DeviceRegistrationError),
Mark Ryan6ed5aea2018-04-20 13:52:35 +0100130 RegisterSignalHandler(sys_util::Error),
Dylan Reid0f579cb2018-07-09 15:39:34 -0700131 RegisterWayland(arch::DeviceRegistrationError),
Lepton Wu60893882018-11-21 11:06:18 -0800132 ReserveGpuMemory(sys_util::MmapError),
133 ReserveMemory(sys_util::Error),
Jakub Starona3411ea2019-04-24 10:55:25 -0700134 ReservePmemMemory(sys_util::MmapError),
Chirantan Ekbote448516e2018-07-24 16:07:42 -0700135 ResetTimerFd(sys_util::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800136 RngDeviceNew(virtio::RngError),
Zach Reizner8fb52112017-12-13 16:04:39 -0800137 SettingGidMap(io_jail::Error),
138 SettingUidMap(io_jail::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800139 SignalFd(sys_util::SignalFdError),
140 SpawnVcpu(io::Error),
Chirantan Ekbote448516e2018-07-24 16:07:42 -0700141 TimerFd(sys_util::Error),
Chirantan Ekbote2d292332018-11-16 11:35:24 -0800142 ValidateRawFd(sys_util::Error),
David Tolnay2b089fc2019-03-04 15:33:22 -0800143 VhostNetDeviceNew(virtio::vhost::Error),
144 VhostVsockDeviceNew(virtio::vhost::Error),
Daniel Verkamp56f283b2018-10-05 11:40:59 -0700145 VirtioPciDev(sys_util::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -0800146 WaylandDeviceNew(sys_util::Error),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800147}
148
David Tolnayc69f9752019-03-01 18:07:56 -0800149impl Display for Error {
David Tolnay3df35522019-03-11 12:36:30 -0700150 #[remain::check]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800151 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
David Tolnayc69f9752019-03-01 18:07:56 -0800152 use self::Error::*;
153
David Tolnay3df35522019-03-11 12:36:30 -0700154 #[sorted]
Zach Reizner39aa26b2017-12-12 18:03:23 -0800155 match self {
Lepton Wu60893882018-11-21 11:06:18 -0800156 AddGpuDeviceMemory(e) => write!(f, "failed to add gpu device memory: {}", e),
Jakub Starona3411ea2019-04-24 10:55:25 -0700157 AddPmemDeviceMemory(e) => write!(f, "failed to add pmem device memory: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800158 AllocateGpuDeviceAddress => write!(f, "failed to allocate gpu device guest address"),
Jakub Starona3411ea2019-04-24 10:55:25 -0700159 AllocatePmemDeviceAddress(e) => {
160 write!(f, "failed to allocate memory for pmem device: {}", e)
161 }
David Tolnayc69f9752019-03-01 18:07:56 -0800162 BalloonDeviceNew(e) => write!(f, "failed to create balloon: {}", e),
163 BlockDeviceNew(e) => write!(f, "failed to create block device: {}", e),
164 BlockSignal(e) => write!(f, "failed to block signal: {}", e),
David Tolnaybe034262019-03-04 17:48:36 -0800165 BuildVm(e) => write!(f, "The architecture failed to build the vm: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800166 ChownTpmStorage(e) => write!(f, "failed to chown tpm storage: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800167 CloneEventFd(e) => write!(f, "failed to clone eventfd: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800168 CreateCrasClient(e) => write!(f, "failed to create cras client: {}", e),
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700169 CreateDiskError(e) => write!(f, "failed to create virtual disk: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800170 CreateEventFd(e) => write!(f, "failed to create eventfd: {}", e),
171 CreatePollContext(e) => write!(f, "failed to create poll context: {}", e),
172 CreateSignalFd(e) => write!(f, "failed to create signalfd: {}", e),
173 CreateSocket(e) => write!(f, "failed to create socket: {}", e),
174 CreateTapDevice(e) => write!(f, "failed to create tap device: {}", e),
175 CreateTimerFd(e) => write!(f, "failed to create timerfd: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800176 CreateTpmStorage(p, e) => {
177 write!(f, "failed to create tpm storage dir {}: {}", p.display(), e)
178 }
Jingkui Wang100e6e42019-03-08 20:41:57 -0800179 CreateUsbProvider(e) => write!(f, "failed to create usb provider: {}", e),
Xiong Zhang17b0daf2019-04-23 17:14:50 +0800180 CreateVfioDevice(e) => write!(f, "Failed to create vfio device {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800181 DeviceJail(e) => write!(f, "failed to jail device: {}", e),
182 DevicePivotRoot(e) => write!(f, "failed to pivot root device: {}", e),
183 Disk(e) => write!(f, "failed to load disk image: {}", e),
184 DiskImageLock(e) => write!(f, "failed to lock disk image: {}", e),
Dmitry Torokhov71006072019-03-06 10:56:51 -0800185 DropCapabilities(e) => write!(f, "failed to drop process capabilities: {}", e),
David Tolnay64cd5ea2019-04-15 15:56:35 -0700186 InputDeviceNew(e) => write!(f, "failed to set up input device: {}", e),
187 InputEventsOpen(e) => write!(f, "failed to open event device: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800188 InvalidFdPath => write!(f, "failed parsing a /proc/self/fd/*"),
189 InvalidWaylandPath => write!(f, "wayland socket path has no parent or file name"),
David Tolnayfd0971d2019-03-04 17:15:57 -0800190 IoJail(e) => write!(f, "{}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800191 LoadKernel(e) => write!(f, "failed to load kernel: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800192 NetDeviceNew(e) => write!(f, "failed to set up virtio networking: {}", e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800193 OpenAndroidFstab(p, e) => write!(
David Tolnayb4bd00f2019-02-12 17:51:26 -0800194 f,
195 "failed to open android fstab file {}: {}",
196 p.display(),
197 e
198 ),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -0700199 OpenBios(p, e) => write!(f, "failed to open bios {}: {}", p.display(), e),
David Tolnay3df35522019-03-11 12:36:30 -0700200 OpenInitrd(p, e) => write!(f, "failed to open initrd {}: {}", p.display(), e),
201 OpenKernel(p, e) => write!(f, "failed to open kernel image {}: {}", p.display(), e),
David Tolnayfd0971d2019-03-04 17:15:57 -0800202 OpenVinput(p, e) => write!(f, "failed to open vinput device {}: {}", p.display(), e),
David Tolnayc69f9752019-03-01 18:07:56 -0800203 P9DeviceNew(e) => write!(f, "failed to create 9p device: {}", e),
Lepton Wu39133a02019-02-27 12:42:29 -0800204 PivotRootDoesntExist(p) => write!(f, "{} doesn't exist, can't jail devices.", p),
Jakub Starona3411ea2019-04-24 10:55:25 -0700205 PmemDeviceImageTooBig => {
206 write!(f, "failed to create pmem device: pmem device image too big")
207 }
208 PmemDeviceNew(e) => write!(f, "failed to create pmem device: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800209 PollContextAdd(e) => write!(f, "failed to add fd to poll context: {}", e),
210 PollContextDelete(e) => write!(f, "failed to remove fd from poll context: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800211 ReadLowmemAvailable(e) => write!(
Zach Reizner55a9e502018-10-03 10:22:32 -0700212 f,
213 "failed to read /sys/kernel/mm/chromeos-low_mem/available: {}",
214 e
215 ),
David Tolnayc69f9752019-03-01 18:07:56 -0800216 ReadLowmemMargin(e) => write!(
Zach Reizner55a9e502018-10-03 10:22:32 -0700217 f,
218 "failed to read /sys/kernel/mm/chromeos-low_mem/margin: {}",
219 e
220 ),
David Tolnayc69f9752019-03-01 18:07:56 -0800221 RegisterBalloon(e) => write!(f, "error registering balloon device: {}", e),
222 RegisterBlock(e) => write!(f, "error registering block device: {}", e),
223 RegisterGpu(e) => write!(f, "error registering gpu device: {}", e),
224 RegisterNet(e) => write!(f, "error registering net device: {}", e),
225 RegisterP9(e) => write!(f, "error registering 9p device: {}", e),
226 RegisterRng(e) => write!(f, "error registering rng device: {}", e),
227 RegisterSignalHandler(e) => write!(f, "error registering signal handler: {}", e),
228 RegisterWayland(e) => write!(f, "error registering wayland device: {}", e),
Lepton Wu60893882018-11-21 11:06:18 -0800229 ReserveGpuMemory(e) => write!(f, "failed to reserve gpu memory: {}", e),
230 ReserveMemory(e) => write!(f, "failed to reserve memory: {}", e),
Jakub Starona3411ea2019-04-24 10:55:25 -0700231 ReservePmemMemory(e) => write!(f, "failed to reserve pmem memory: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800232 ResetTimerFd(e) => write!(f, "failed to reset timerfd: {}", e),
233 RngDeviceNew(e) => write!(f, "failed to set up rng: {}", e),
David Tolnayc69f9752019-03-01 18:07:56 -0800234 SettingGidMap(e) => write!(f, "error setting GID map: {}", e),
235 SettingUidMap(e) => write!(f, "error setting UID map: {}", e),
236 SignalFd(e) => write!(f, "failed to read signal fd: {}", e),
237 SpawnVcpu(e) => write!(f, "failed to spawn VCPU thread: {}", e),
238 TimerFd(e) => write!(f, "failed to read timer fd: {}", e),
239 ValidateRawFd(e) => write!(f, "failed to validate raw fd: {}", e),
240 VhostNetDeviceNew(e) => write!(f, "failed to set up vhost networking: {}", e),
241 VhostVsockDeviceNew(e) => write!(f, "failed to set up virtual socket device: {}", e),
242 VirtioPciDev(e) => write!(f, "failed to create virtio pci dev: {}", e),
243 WaylandDeviceNew(e) => write!(f, "failed to create wayland device: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800244 }
245 }
246}
247
David Tolnayfd0971d2019-03-04 17:15:57 -0800248impl From<io_jail::Error> for Error {
249 fn from(err: io_jail::Error) -> Self {
250 Error::IoJail(err)
251 }
252}
253
David Tolnayc69f9752019-03-01 18:07:56 -0800254impl std::error::Error for Error {}
Dylan Reid059a1882018-07-23 17:58:09 -0700255
Zach Reizner39aa26b2017-12-12 18:03:23 -0800256type Result<T> = std::result::Result<T, Error>;
257
Jakub Starond99cd0a2019-04-11 14:09:39 -0700258enum TaggedControlSocket {
259 Vm(VmControlResponseSocket),
Gurchetan Singh53edb812019-05-22 08:57:16 -0700260 VmMemory(VmMemoryControlResponseSocket),
Xiong Zhang2515b752019-09-19 10:29:02 +0800261 VmIrq(VmIrqResponseSocket),
Jakub Starond99cd0a2019-04-11 14:09:39 -0700262}
263
264impl AsRef<UnixSeqpacket> for TaggedControlSocket {
265 fn as_ref(&self) -> &UnixSeqpacket {
266 use self::TaggedControlSocket::*;
267 match &self {
268 Vm(ref socket) => socket,
Gurchetan Singh53edb812019-05-22 08:57:16 -0700269 VmMemory(ref socket) => socket,
Xiong Zhang2515b752019-09-19 10:29:02 +0800270 VmIrq(ref socket) => socket,
Jakub Starond99cd0a2019-04-11 14:09:39 -0700271 }
272 }
273}
274
275impl AsRawFd for TaggedControlSocket {
276 fn as_raw_fd(&self) -> RawFd {
277 self.as_ref().as_raw_fd()
278 }
279}
280
Zach Reizner44863792019-06-26 14:22:08 -0700281fn create_base_minijail(
282 root: &Path,
283 log_failures: bool,
284 seccomp_policy: &Path,
285) -> Result<Minijail> {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800286 // All child jails run in a new user namespace without any users mapped,
287 // they run as nobody unless otherwise configured.
David Tolnay5bbbf612018-12-01 17:49:30 -0800288 let mut j = Minijail::new().map_err(Error::DeviceJail)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -0800289 j.namespace_pids();
290 j.namespace_user();
291 j.namespace_user_disable_setgroups();
292 // Don't need any capabilities.
293 j.use_caps(0);
294 // Create a new mount namespace with an empty root FS.
295 j.namespace_vfs();
David Tolnay5bbbf612018-12-01 17:49:30 -0800296 j.enter_pivot_root(root).map_err(Error::DevicePivotRoot)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -0800297 // Run in an empty network namespace.
298 j.namespace_net();
299 // Apply the block device seccomp policy.
300 j.no_new_privs();
Stephen Barber3b1d8a52018-01-06 17:34:51 -0800301 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP, which will correctly kill
302 // the entire device process if a worker thread commits a seccomp violation.
303 j.set_seccomp_filter_tsync();
Zach Reizner44863792019-06-26 14:22:08 -0700304 if log_failures {
305 j.log_seccomp_filter_failures();
306 }
Zach Reizner39aa26b2017-12-12 18:03:23 -0800307 j.parse_seccomp_filters(seccomp_policy)
David Tolnay5bbbf612018-12-01 17:49:30 -0800308 .map_err(Error::DeviceJail)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -0800309 j.use_seccomp_filter();
310 // Don't do init setup.
311 j.run_as_init();
312 Ok(j)
313}
314
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800315fn simple_jail(cfg: &Config, policy: &str) -> Result<Option<Minijail>> {
Lepton Wu9105e9f2019-03-14 11:38:31 -0700316 if cfg.sandbox {
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800317 let pivot_root: &str = option_env!("DEFAULT_PIVOT_ROOT").unwrap_or("/var/empty");
318 // A directory for a jailed device's pivot root.
319 let root_path = Path::new(pivot_root);
320 if !root_path.exists() {
321 return Err(Error::PivotRootDoesntExist(pivot_root));
322 }
323 let policy_path: PathBuf = cfg.seccomp_policy_dir.join(policy);
Zach Reizner44863792019-06-26 14:22:08 -0700324 Ok(Some(create_base_minijail(
325 root_path,
326 cfg.seccomp_log_failures,
327 &policy_path,
328 )?))
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800329 } else {
330 Ok(None)
331 }
332}
333
David Tolnayfd0971d2019-03-04 17:15:57 -0800334type DeviceResult<T = VirtioDeviceStub> = std::result::Result<T, Error>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800335
336fn create_block_device(
337 cfg: &Config,
338 disk: &DiskOption,
Jakub Staronecf81e02019-04-11 11:43:39 -0700339 disk_device_socket: DiskControlResponseSocket,
David Tolnay2b089fc2019-03-04 15:33:22 -0800340) -> DeviceResult {
341 // Special case '/proc/self/fd/*' paths. The FD is already open, just use it.
342 let raw_image: File = if disk.path.parent() == Some(Path::new("/proc/self/fd")) {
343 // Safe because we will validate |raw_fd|.
344 unsafe { File::from_raw_fd(raw_fd_from_path(&disk.path)?) }
345 } else {
346 OpenOptions::new()
347 .read(true)
348 .write(!disk.read_only)
349 .open(&disk.path)
350 .map_err(Error::Disk)?
351 };
352 // Lock the disk image to prevent other crosvm instances from using it.
353 let lock_op = if disk.read_only {
354 FlockOperation::LockShared
355 } else {
356 FlockOperation::LockExclusive
357 };
358 flock(&raw_image, lock_op, true).map_err(Error::DiskImageLock)?;
359
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700360 let disk_file = disk::create_disk_file(raw_image).map_err(Error::CreateDiskError)?;
361 let dev = virtio::Block::new(disk_file, disk.read_only, Some(disk_device_socket))
362 .map_err(Error::BlockDeviceNew)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800363
364 Ok(VirtioDeviceStub {
Cody Schuffelen7d533e52019-07-02 16:54:05 -0700365 dev: Box::new(dev),
David Tolnay2b089fc2019-03-04 15:33:22 -0800366 jail: simple_jail(&cfg, "block_device.policy")?,
367 })
368}
369
370fn create_rng_device(cfg: &Config) -> DeviceResult {
371 let dev = virtio::Rng::new().map_err(Error::RngDeviceNew)?;
372
373 Ok(VirtioDeviceStub {
374 dev: Box::new(dev),
375 jail: simple_jail(&cfg, "rng_device.policy")?,
376 })
377}
378
379#[cfg(feature = "tpm")]
380fn create_tpm_device(cfg: &Config) -> DeviceResult {
381 use std::ffi::CString;
382 use std::fs;
383 use std::process;
384 use sys_util::chown;
385
386 let tpm_storage: PathBuf;
387 let mut tpm_jail = simple_jail(&cfg, "tpm_device.policy")?;
388
389 match &mut tpm_jail {
390 Some(jail) => {
391 // Create a tmpfs in the device's root directory for tpm
392 // simulator storage. The size is 20*1024, or 20 KB.
393 jail.mount_with_data(
394 Path::new("none"),
395 Path::new("/"),
396 "tmpfs",
397 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
398 "size=20480",
399 )?;
400
401 let crosvm_ids = add_crosvm_user_to_jail(jail, "tpm")?;
402
403 let pid = process::id();
404 let tpm_pid_dir = format!("/run/vm/tpm.{}", pid);
405 tpm_storage = Path::new(&tpm_pid_dir).to_owned();
David Tolnayfd0971d2019-03-04 17:15:57 -0800406 fs::create_dir_all(&tpm_storage)
407 .map_err(|e| Error::CreateTpmStorage(tpm_storage.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800408 let tpm_pid_dir_c = CString::new(tpm_pid_dir).expect("no nul bytes");
David Tolnayfd0971d2019-03-04 17:15:57 -0800409 chown(&tpm_pid_dir_c, crosvm_ids.uid, crosvm_ids.gid)
410 .map_err(Error::ChownTpmStorage)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800411
412 jail.mount_bind(&tpm_storage, &tpm_storage, true)?;
413 }
414 None => {
415 // Path used inside cros_sdk which does not have /run/vm.
416 tpm_storage = Path::new("/tmp/tpm-simulator").to_owned();
417 }
418 }
419
420 let dev = virtio::Tpm::new(tpm_storage);
421
422 Ok(VirtioDeviceStub {
423 dev: Box::new(dev),
424 jail: tpm_jail,
425 })
426}
427
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800428fn create_single_touch_device(cfg: &Config, single_touch_spec: &TouchDeviceOption) -> DeviceResult {
429 let socket = create_input_socket(&single_touch_spec.path).map_err(|e| {
430 error!("failed configuring virtio single touch: {:?}", e);
431 e
432 })?;
433
434 let dev = virtio::new_single_touch(socket, single_touch_spec.width, single_touch_spec.height)
435 .map_err(Error::InputDeviceNew)?;
436 Ok(VirtioDeviceStub {
437 dev: Box::new(dev),
438 jail: simple_jail(&cfg, "input_device.policy")?,
439 })
440}
441
442fn create_trackpad_device(cfg: &Config, trackpad_spec: &TouchDeviceOption) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800443 let socket = create_input_socket(&trackpad_spec.path).map_err(|e| {
444 error!("failed configuring virtio trackpad: {}", e);
445 e
446 })?;
447
448 let dev = virtio::new_trackpad(socket, trackpad_spec.width, trackpad_spec.height)
449 .map_err(Error::InputDeviceNew)?;
450
451 Ok(VirtioDeviceStub {
452 dev: Box::new(dev),
453 jail: simple_jail(&cfg, "input_device.policy")?,
454 })
455}
456
457fn create_mouse_device(cfg: &Config, mouse_socket: &Path) -> DeviceResult {
458 let socket = create_input_socket(&mouse_socket).map_err(|e| {
459 error!("failed configuring virtio mouse: {}", e);
460 e
461 })?;
462
463 let dev = virtio::new_mouse(socket).map_err(Error::InputDeviceNew)?;
464
465 Ok(VirtioDeviceStub {
466 dev: Box::new(dev),
467 jail: simple_jail(&cfg, "input_device.policy")?,
468 })
469}
470
471fn create_keyboard_device(cfg: &Config, keyboard_socket: &Path) -> DeviceResult {
472 let socket = create_input_socket(&keyboard_socket).map_err(|e| {
473 error!("failed configuring virtio keyboard: {}", e);
474 e
475 })?;
476
477 let dev = virtio::new_keyboard(socket).map_err(Error::InputDeviceNew)?;
478
479 Ok(VirtioDeviceStub {
480 dev: Box::new(dev),
481 jail: simple_jail(&cfg, "input_device.policy")?,
482 })
483}
484
485fn create_vinput_device(cfg: &Config, dev_path: &Path) -> DeviceResult {
486 let dev_file = OpenOptions::new()
487 .read(true)
488 .write(true)
489 .open(dev_path)
David Tolnayfd0971d2019-03-04 17:15:57 -0800490 .map_err(|e| Error::OpenVinput(dev_path.to_owned(), e))?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800491
492 let dev = virtio::new_evdev(dev_file).map_err(Error::InputDeviceNew)?;
493
494 Ok(VirtioDeviceStub {
495 dev: Box::new(dev),
496 jail: simple_jail(&cfg, "input_device.policy")?,
497 })
498}
499
Jakub Staron1f828d72019-04-11 12:49:29 -0700500fn create_balloon_device(cfg: &Config, socket: BalloonControlResponseSocket) -> DeviceResult {
David Tolnay2b089fc2019-03-04 15:33:22 -0800501 let dev = virtio::Balloon::new(socket).map_err(Error::BalloonDeviceNew)?;
502
503 Ok(VirtioDeviceStub {
504 dev: Box::new(dev),
505 jail: simple_jail(&cfg, "balloon_device.policy")?,
506 })
507}
508
509fn create_tap_net_device(cfg: &Config, tap_fd: RawFd) -> DeviceResult {
510 // Safe because we ensure that we get a unique handle to the fd.
511 let tap = unsafe {
512 Tap::from_raw_fd(validate_raw_fd(tap_fd).map_err(Error::ValidateRawFd)?)
513 .map_err(Error::CreateTapDevice)?
514 };
515
516 let dev = virtio::Net::from(tap).map_err(Error::NetDeviceNew)?;
517
518 Ok(VirtioDeviceStub {
519 dev: Box::new(dev),
520 jail: simple_jail(&cfg, "net_device.policy")?,
521 })
522}
523
524fn create_net_device(
525 cfg: &Config,
526 host_ip: Ipv4Addr,
527 netmask: Ipv4Addr,
528 mac_address: MacAddress,
529 mem: &GuestMemory,
530) -> DeviceResult {
531 let dev = if cfg.vhost_net {
532 let dev =
533 virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(host_ip, netmask, mac_address, mem)
534 .map_err(Error::VhostNetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800535 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800536 } else {
537 let dev =
538 virtio::Net::<Tap>::new(host_ip, netmask, mac_address).map_err(Error::NetDeviceNew)?;
David Tolnayfdac5ed2019-03-08 16:56:14 -0800539 Box::new(dev) as Box<dyn VirtioDevice>
David Tolnay2b089fc2019-03-04 15:33:22 -0800540 };
541
542 let policy = if cfg.vhost_net {
543 "vhost_net_device.policy"
544 } else {
545 "net_device.policy"
546 };
547
548 Ok(VirtioDeviceStub {
549 dev,
550 jail: simple_jail(&cfg, policy)?,
551 })
552}
553
554#[cfg(feature = "gpu")]
555fn create_gpu_device(
556 cfg: &Config,
557 exit_evt: &EventFd,
Gurchetan Singh7ec58fa2019-05-15 15:30:38 -0700558 gpu_device_socket: VmMemoryControlRequestSocket,
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900559 gpu_sockets: Vec<virtio::resource_bridge::ResourceResponseSocket>,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700560 wayland_socket_path: Option<PathBuf>,
561 x_display: Option<String>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800562) -> DeviceResult {
563 let jailed_wayland_path = Path::new("/wayland-0");
564
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700565 let mut display_backends = vec![
566 virtio::DisplayBackend::X(x_display),
567 virtio::DisplayBackend::Null,
568 ];
569
570 if let Some(socket_path) = wayland_socket_path.as_ref() {
571 display_backends.insert(
572 0,
573 virtio::DisplayBackend::Wayland(if cfg.sandbox {
574 Some(jailed_wayland_path.to_owned())
575 } else {
576 Some(socket_path.to_owned())
577 }),
578 );
579 }
580
David Tolnay2b089fc2019-03-04 15:33:22 -0800581 let dev = virtio::Gpu::new(
582 exit_evt.try_clone().map_err(Error::CloneEventFd)?,
Gurchetan Singh7ec58fa2019-05-15 15:30:38 -0700583 Some(gpu_device_socket),
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700584 NonZeroU8::new(1).unwrap(), // number of scanouts
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900585 gpu_sockets,
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700586 display_backends,
David Tolnay2b089fc2019-03-04 15:33:22 -0800587 );
588
589 let jail = match simple_jail(&cfg, "gpu_device.policy")? {
590 Some(mut jail) => {
591 // Create a tmpfs in the device's root directory so that we can bind mount the
592 // dri directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
593 jail.mount_with_data(
594 Path::new("none"),
595 Path::new("/"),
596 "tmpfs",
597 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
598 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800599 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800600
601 // Device nodes required for DRM.
602 let sys_dev_char_path = Path::new("/sys/dev/char");
David Tolnayfd0971d2019-03-04 17:15:57 -0800603 jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800604 let sys_devices_path = Path::new("/sys/devices");
David Tolnayfd0971d2019-03-04 17:15:57 -0800605 jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800606 let drm_dri_path = Path::new("/dev/dri");
David Tolnayfd0971d2019-03-04 17:15:57 -0800607 jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800608
David Riley06787c52019-07-24 12:09:07 -0700609 // If the ARM specific devices exist on the host, bind mount them in.
610 let mali0_path = Path::new("/dev/mali0");
611 if mali0_path.exists() {
612 jail.mount_bind(mali0_path, mali0_path, true)?;
613 }
614
615 let pvr_sync_path = Path::new("/dev/pvr_sync");
616 if pvr_sync_path.exists() {
617 jail.mount_bind(pvr_sync_path, pvr_sync_path, true)?;
618 }
619
David Tolnay2b089fc2019-03-04 15:33:22 -0800620 // Libraries that are required when mesa drivers are dynamically loaded.
David Riley06787c52019-07-24 12:09:07 -0700621 let lib_dirs = &["/usr/lib", "/usr/lib64", "/lib", "/lib64"];
622 for dir in lib_dirs {
623 let dir_path = Path::new(dir);
624 if dir_path.exists() {
625 jail.mount_bind(dir_path, dir_path, false)?;
626 }
627 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800628
629 // Bind mount the wayland socket into jail's root. This is necessary since each
630 // new wayland context must open() the socket.
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700631 if let Some(path) = wayland_socket_path {
632 jail.mount_bind(path.as_ref(), jailed_wayland_path, true)?;
633 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800634
635 add_crosvm_user_to_jail(&mut jail, "gpu")?;
636
David Riley54e660b2019-07-24 17:22:50 -0700637 // pvr driver requires read access to /proc/self/task/*/comm.
638 let proc_path = Path::new("/proc");
639 jail.mount(
640 proc_path,
641 proc_path,
642 "proc",
643 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY) as usize,
644 )?;
645
David Tolnay2b089fc2019-03-04 15:33:22 -0800646 Some(jail)
647 }
648 None => None,
649 };
650
651 Ok(VirtioDeviceStub {
652 dev: Box::new(dev),
653 jail,
654 })
655}
656
657fn create_wayland_device(
658 cfg: &Config,
659 socket_path: &Path,
Gurchetan Singh53edb812019-05-22 08:57:16 -0700660 socket: VmMemoryControlRequestSocket,
David Tolnay2b089fc2019-03-04 15:33:22 -0800661 resource_bridge: Option<virtio::resource_bridge::ResourceRequestSocket>,
662) -> DeviceResult {
663 let wayland_socket_dir = socket_path.parent().ok_or(Error::InvalidWaylandPath)?;
664 let wayland_socket_name = socket_path.file_name().ok_or(Error::InvalidWaylandPath)?;
665 let jailed_wayland_dir = Path::new("/wayland");
666 let jailed_wayland_path = jailed_wayland_dir.join(wayland_socket_name);
667
668 let dev = virtio::Wl::new(
Lepton Wu9105e9f2019-03-14 11:38:31 -0700669 if cfg.sandbox {
David Tolnay2b089fc2019-03-04 15:33:22 -0800670 &jailed_wayland_path
671 } else {
672 socket_path
673 },
674 socket,
675 resource_bridge,
676 )
677 .map_err(Error::WaylandDeviceNew)?;
678
679 let jail = match simple_jail(&cfg, "wl_device.policy")? {
680 Some(mut jail) => {
681 // Create a tmpfs in the device's root directory so that we can bind mount the wayland
682 // socket directory into it. The size=67108864 is size=64*1024*1024 or size=64MB.
683 jail.mount_with_data(
684 Path::new("none"),
685 Path::new("/"),
686 "tmpfs",
687 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
688 "size=67108864",
David Tolnayfd0971d2019-03-04 17:15:57 -0800689 )?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800690
691 // Bind mount the wayland socket's directory into jail's root. This is necessary since
692 // each new wayland context must open() the socket. If the wayland socket is ever
693 // destroyed and remade in the same host directory, new connections will be possible
694 // without restarting the wayland device.
David Tolnayfd0971d2019-03-04 17:15:57 -0800695 jail.mount_bind(wayland_socket_dir, jailed_wayland_dir, true)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800696
697 add_crosvm_user_to_jail(&mut jail, "Wayland")?;
698
699 Some(jail)
700 }
701 None => None,
702 };
703
704 Ok(VirtioDeviceStub {
705 dev: Box::new(dev),
706 jail,
707 })
708}
709
710fn create_vhost_vsock_device(cfg: &Config, cid: u64, mem: &GuestMemory) -> DeviceResult {
711 let dev = virtio::vhost::Vsock::new(cid, mem).map_err(Error::VhostVsockDeviceNew)?;
712
713 Ok(VirtioDeviceStub {
714 dev: Box::new(dev),
715 jail: simple_jail(&cfg, "vhost_vsock_device.policy")?,
716 })
717}
718
719fn create_9p_device(cfg: &Config, chronos: Ids, src: &Path, tag: &str) -> DeviceResult {
720 let (jail, root) = match simple_jail(&cfg, "9p_device.policy")? {
721 Some(mut jail) => {
722 // The shared directory becomes the root of the device's file system.
723 let root = Path::new("/");
David Tolnayfd0971d2019-03-04 17:15:57 -0800724 jail.mount_bind(src, root, true)?;
David Tolnay2b089fc2019-03-04 15:33:22 -0800725
726 // Set the uid/gid for the jailed process, and give a basic id map. This
727 // is required for the above bind mount to work.
728 jail.change_uid(chronos.uid);
729 jail.change_gid(chronos.gid);
730 jail.uidmap(&format!("{0} {0} 1", chronos.uid))
731 .map_err(Error::SettingUidMap)?;
732 jail.gidmap(&format!("{0} {0} 1", chronos.gid))
733 .map_err(Error::SettingGidMap)?;
734
735 (Some(jail), root)
736 }
737 None => {
738 // There's no bind mount so we tell the server to treat the source directory as the
David Tolnay9deb7d72019-03-05 18:25:44 -0800739 // root.
David Tolnay2b089fc2019-03-04 15:33:22 -0800740 (None, src)
741 }
742 };
743
744 let dev = virtio::P9::new(root, tag).map_err(Error::P9DeviceNew)?;
745
746 Ok(VirtioDeviceStub {
747 dev: Box::new(dev),
748 jail,
749 })
750}
751
Jakub Starona3411ea2019-04-24 10:55:25 -0700752fn create_pmem_device(
753 cfg: &Config,
754 vm: &mut Vm,
755 resources: &mut SystemAllocator,
756 disk: &DiskOption,
757 index: usize,
758) -> DeviceResult {
759 let fd = OpenOptions::new()
760 .read(true)
761 .write(!disk.read_only)
762 .open(&disk.path)
763 .map_err(Error::Disk)?;
764
765 let image_size = {
766 let metadata = std::fs::metadata(&disk.path).map_err(Error::Disk)?;
767 metadata.len()
768 };
769
770 let protection = {
771 if disk.read_only {
772 Protection::read()
773 } else {
774 Protection::read_write()
775 }
776 };
777
778 let memory_mapping = {
779 // Conversion from u64 to usize may fail on 32bit system.
780 let image_size = usize::try_from(image_size).map_err(|_| Error::PmemDeviceImageTooBig)?;
781
782 MemoryMapping::from_fd_offset_protection(&fd, image_size, 0, protection)
783 .map_err(Error::ReservePmemMemory)?
784 };
785
786 let mapping_address = resources
Xiong Zhang383b3b52019-10-30 14:59:26 +0800787 .mmio_allocator(MmioType::High)
Jakub Starona3411ea2019-04-24 10:55:25 -0700788 .allocate_with_align(
789 image_size,
790 Alloc::PmemDevice(index),
791 format!("pmem_disk_image_{}", index),
792 // Linux kernel requires pmem namespaces to be 128 MiB aligned.
793 128 * 1024 * 1024, /* 128 MiB */
794 )
795 .map_err(Error::AllocatePmemDeviceAddress)?;
796
Xiong Zhang383b3b52019-10-30 14:59:26 +0800797 vm.add_mmio_memory(
Jakub Starona3411ea2019-04-24 10:55:25 -0700798 GuestAddress(mapping_address),
799 memory_mapping,
800 /* read_only = */ disk.read_only,
801 /* log_dirty_pages = */ false,
802 )
803 .map_err(Error::AddPmemDeviceMemory)?;
804
805 let dev = virtio::Pmem::new(fd, GuestAddress(mapping_address), image_size)
806 .map_err(Error::PmemDeviceNew)?;
807
808 Ok(VirtioDeviceStub {
809 dev: Box::new(dev) as Box<dyn VirtioDevice>,
Jakub Staroncc91fc82019-06-10 14:00:07 -0700810 jail: simple_jail(&cfg, "pmem_device.policy")?,
Jakub Starona3411ea2019-04-24 10:55:25 -0700811 })
812}
813
Dmitry Torokhovee42b8c2019-05-27 11:14:20 -0700814// gpu_device_socket is not used when GPU support is disabled.
815#[cfg_attr(not(feature = "gpu"), allow(unused_variables))]
David Tolnay2b089fc2019-03-04 15:33:22 -0800816fn create_virtio_devices(
817 cfg: &Config,
Zach Reizner55a9e502018-10-03 10:22:32 -0700818 mem: &GuestMemory,
Jakub Starona3411ea2019-04-24 10:55:25 -0700819 vm: &mut Vm,
820 resources: &mut SystemAllocator,
Zach Reizner55a9e502018-10-03 10:22:32 -0700821 _exit_evt: &EventFd,
Gurchetan Singh53edb812019-05-22 08:57:16 -0700822 wayland_device_socket: VmMemoryControlRequestSocket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -0700823 gpu_device_socket: VmMemoryControlRequestSocket,
Jakub Staron1f828d72019-04-11 12:49:29 -0700824 balloon_device_socket: BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -0700825 disk_device_sockets: &mut Vec<DiskControlResponseSocket>,
David Tolnay2b089fc2019-03-04 15:33:22 -0800826) -> DeviceResult<Vec<VirtioDeviceStub>> {
Dylan Reid059a1882018-07-23 17:58:09 -0700827 let mut devs = Vec::new();
Zach Reizner39aa26b2017-12-12 18:03:23 -0800828
Zach Reizner8fb52112017-12-13 16:04:39 -0800829 for disk in &cfg.disks {
Daniel Verkamp92f73d72018-12-04 13:17:46 -0800830 let disk_device_socket = disk_device_sockets.remove(0);
David Tolnay2b089fc2019-03-04 15:33:22 -0800831 devs.push(create_block_device(cfg, disk, disk_device_socket)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800832 }
833
Jakub Starona3411ea2019-04-24 10:55:25 -0700834 for (index, pmem_disk) in cfg.pmem_devices.iter().enumerate() {
835 devs.push(create_pmem_device(cfg, vm, resources, pmem_disk, index)?);
836 }
837
David Tolnay2b089fc2019-03-04 15:33:22 -0800838 devs.push(create_rng_device(cfg)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800839
David Tolnayde6b29a2018-12-20 11:49:46 -0800840 #[cfg(feature = "tpm")]
841 {
David Tolnay43f8e212019-02-13 17:28:16 -0800842 if cfg.software_tpm {
David Tolnay2b089fc2019-03-04 15:33:22 -0800843 devs.push(create_tpm_device(cfg)?);
David Tolnay43f8e212019-02-13 17:28:16 -0800844 }
David Tolnayde6b29a2018-12-20 11:49:46 -0800845 }
846
Jorge E. Moreira99d3f082019-03-07 10:59:54 -0800847 if let Some(single_touch_spec) = &cfg.virtio_single_touch {
848 devs.push(create_single_touch_device(cfg, single_touch_spec)?);
849 }
850
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800851 if let Some(trackpad_spec) = &cfg.virtio_trackpad {
David Tolnay2b089fc2019-03-04 15:33:22 -0800852 devs.push(create_trackpad_device(cfg, trackpad_spec)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800853 }
854
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800855 if let Some(mouse_socket) = &cfg.virtio_mouse {
David Tolnay2b089fc2019-03-04 15:33:22 -0800856 devs.push(create_mouse_device(cfg, mouse_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800857 }
858
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800859 if let Some(keyboard_socket) = &cfg.virtio_keyboard {
David Tolnay2b089fc2019-03-04 15:33:22 -0800860 devs.push(create_keyboard_device(cfg, keyboard_socket)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800861 }
862
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800863 for dev_path in &cfg.virtio_input_evdevs {
David Tolnay2b089fc2019-03-04 15:33:22 -0800864 devs.push(create_vinput_device(cfg, dev_path)?);
Jorge E. Moreiradffec502019-01-14 18:44:49 -0800865 }
866
David Tolnay2b089fc2019-03-04 15:33:22 -0800867 devs.push(create_balloon_device(cfg, balloon_device_socket)?);
Dylan Reid295ccac2017-11-06 14:06:24 -0800868
Zach Reizner39aa26b2017-12-12 18:03:23 -0800869 // We checked above that if the IP is defined, then the netmask is, too.
Jianxun Zhang8f4d7682019-02-21 12:55:31 -0800870 for tap_fd in &cfg.tap_fd {
David Tolnay2b089fc2019-03-04 15:33:22 -0800871 devs.push(create_tap_net_device(cfg, *tap_fd)?);
Jorge E. Moreirab7952802019-02-12 16:43:05 -0800872 }
873
David Tolnay2b089fc2019-03-04 15:33:22 -0800874 if let (Some(host_ip), Some(netmask), Some(mac_address)) =
875 (cfg.host_ip, cfg.netmask, cfg.mac_address)
876 {
877 devs.push(create_net_device(cfg, host_ip, netmask, mac_address, mem)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800878 }
879
David Tolnayfa701712019-02-13 16:42:54 -0800880 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
Chirantan Ekbotedd11d432019-06-11 21:50:46 +0900881 let mut resource_bridges = Vec::<virtio::resource_bridge::ResourceResponseSocket>::new();
882
883 if let Some(wayland_socket_path) = cfg.wayland_socket_path.as_ref() {
884 #[cfg_attr(not(feature = "gpu"), allow(unused_mut))]
885 let mut wl_resource_bridge = None::<virtio::resource_bridge::ResourceRequestSocket>;
886
887 #[cfg(feature = "gpu")]
888 {
889 if cfg.gpu {
890 let (wl_socket, gpu_socket) =
891 virtio::resource_bridge::pair().map_err(Error::CreateSocket)?;
892 resource_bridges.push(gpu_socket);
893 wl_resource_bridge = Some(wl_socket);
894 }
895 }
896
897 devs.push(create_wayland_device(
898 cfg,
899 wayland_socket_path,
900 wayland_device_socket,
901 wl_resource_bridge,
902 )?);
903 }
David Tolnayfa701712019-02-13 16:42:54 -0800904
Zach Reizner3a8100a2017-09-13 19:15:43 -0700905 #[cfg(feature = "gpu")]
906 {
907 if cfg.gpu {
Zach Reizner0f2cfb02019-06-19 17:46:03 -0700908 devs.push(create_gpu_device(
909 cfg,
910 _exit_evt,
911 gpu_device_socket,
912 resource_bridges,
913 cfg.wayland_socket_path.clone(),
914 cfg.x_display.clone(),
915 )?);
Zach Reizner3a8100a2017-09-13 19:15:43 -0700916 }
917 }
918
Zach Reizneraa575662018-08-15 10:46:32 -0700919 if let Some(cid) = cfg.cid {
David Tolnay2b089fc2019-03-04 15:33:22 -0800920 devs.push(create_vhost_vsock_device(cfg, cid, mem)?);
Zach Reizneraa575662018-08-15 10:46:32 -0700921 }
922
David Tolnayfd0971d2019-03-04 17:15:57 -0800923 let chronos = get_chronos_ids();
David Tolnay2b089fc2019-03-04 15:33:22 -0800924
925 for (src, tag) in &cfg.shared_dirs {
926 devs.push(create_9p_device(cfg, chronos, src, tag)?);
927 }
928
929 Ok(devs)
930}
931
932fn create_devices(
Trent Begin17ccaad2019-04-17 13:51:25 -0600933 cfg: &Config,
David Tolnay2b089fc2019-03-04 15:33:22 -0800934 mem: &GuestMemory,
Jakub Starona3411ea2019-04-24 10:55:25 -0700935 vm: &mut Vm,
936 resources: &mut SystemAllocator,
David Tolnay2b089fc2019-03-04 15:33:22 -0800937 exit_evt: &EventFd,
Xiong Zhanga5d248c2019-09-17 14:17:19 -0700938 control_sockets: &mut Vec<TaggedControlSocket>,
Gurchetan Singh53edb812019-05-22 08:57:16 -0700939 wayland_device_socket: VmMemoryControlRequestSocket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -0700940 gpu_device_socket: VmMemoryControlRequestSocket,
Jakub Staron1f828d72019-04-11 12:49:29 -0700941 balloon_device_socket: BalloonControlResponseSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -0700942 disk_device_sockets: &mut Vec<DiskControlResponseSocket>,
Jingkui Wang100e6e42019-03-08 20:41:57 -0800943 usb_provider: HostBackendDeviceProvider,
David Tolnayfdac5ed2019-03-08 16:56:14 -0800944) -> DeviceResult<Vec<(Box<dyn PciDevice>, Option<Minijail>)>> {
David Tolnay2b089fc2019-03-04 15:33:22 -0800945 let stubs = create_virtio_devices(
946 &cfg,
947 mem,
Jakub Starona3411ea2019-04-24 10:55:25 -0700948 vm,
949 resources,
David Tolnay2b089fc2019-03-04 15:33:22 -0800950 exit_evt,
951 wayland_device_socket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -0700952 gpu_device_socket,
David Tolnay2b089fc2019-03-04 15:33:22 -0800953 balloon_device_socket,
954 disk_device_sockets,
955 )?;
956
957 let mut pci_devices = Vec::new();
958
959 for stub in stubs {
Xiong Zhanga5d248c2019-09-17 14:17:19 -0700960 let dev = if stub.dev.msix_vectors() > 0 {
961 let (msi_host_socket, msi_device_socket) =
962 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
963 control_sockets.push(TaggedControlSocket::VmIrq(msi_host_socket));
964
965 VirtioPciDevice::new(mem.clone(), stub.dev, Some(msi_device_socket))
966 .map_err(Error::VirtioPciDev)?
967 } else {
968 VirtioPciDevice::new(mem.clone(), stub.dev, None).map_err(Error::VirtioPciDev)?
969 };
970
David Tolnayfdac5ed2019-03-08 16:56:14 -0800971 let dev = Box::new(dev) as Box<dyn PciDevice>;
David Tolnay2b089fc2019-03-04 15:33:22 -0800972 pci_devices.push((dev, stub.jail));
973 }
974
975 if cfg.cras_audio {
paulhsia580d4182019-05-24 16:53:55 +0800976 let mut server = Box::new(CrasClient::new().map_err(Error::CreateCrasClient)?);
977 if cfg.cras_capture {
978 server.enable_cras_capture();
979 }
David Tolnay2b089fc2019-03-04 15:33:22 -0800980 let cras_audio = devices::Ac97Dev::new(mem.clone(), server);
981
982 pci_devices.push((
983 Box::new(cras_audio),
984 simple_jail(&cfg, "cras_audio_device.policy")?,
985 ));
986 }
987
988 if cfg.null_audio {
989 let server = Box::new(DummyStreamSource::new());
990 let null_audio = devices::Ac97Dev::new(mem.clone(), server);
991
992 pci_devices.push((
993 Box::new(null_audio),
994 simple_jail(&cfg, "null_audio_device.policy")?,
995 ));
996 }
Jingkui Wang100e6e42019-03-08 20:41:57 -0800997 // Create xhci controller.
998 let usb_controller = Box::new(XhciController::new(mem.clone(), usb_provider));
999 pci_devices.push((usb_controller, simple_jail(&cfg, "xhci.policy")?));
David Tolnay2b089fc2019-03-04 15:33:22 -08001000
Xiong Zhang17b0daf2019-04-23 17:14:50 +08001001 if cfg.vfio.is_some() {
Xiong Zhang4b5bb3a2019-04-23 17:15:21 +08001002 let (vfio_host_socket_irq, vfio_device_socket_irq) =
1003 msg_socket::pair::<VmIrqResponse, VmIrqRequest>().map_err(Error::CreateSocket)?;
1004 control_sockets.push(TaggedControlSocket::VmIrq(vfio_host_socket_irq));
1005
Xiong Zhang17b0daf2019-04-23 17:14:50 +08001006 let vfio_path = cfg.vfio.as_ref().unwrap().as_path();
Xiong Zhangc554fff2019-04-23 17:14:55 +08001007 let vfiodevice =
Daniel Verkamp04a82c72019-09-24 11:06:58 -07001008 VfioDevice::new(vfio_path, vm, mem.clone()).map_err(Error::CreateVfioDevice)?;
Xiong Zhang4b5bb3a2019-04-23 17:15:21 +08001009 let vfiopcidevice = Box::new(VfioPciDevice::new(vfiodevice, vfio_device_socket_irq));
Xiong Zhang17b0daf2019-04-23 17:14:50 +08001010 pci_devices.push((vfiopcidevice, simple_jail(&cfg, "vfio_device.policy")?));
1011 }
1012
David Tolnay2b089fc2019-03-04 15:33:22 -08001013 Ok(pci_devices)
1014}
1015
1016#[derive(Copy, Clone)]
1017struct Ids {
1018 uid: uid_t,
1019 gid: gid_t,
1020}
1021
David Tolnayfd0971d2019-03-04 17:15:57 -08001022fn get_chronos_ids() -> Ids {
Chirantan Ekboteebd56812018-04-16 19:32:04 -07001023 let chronos_user_group = CStr::from_bytes_with_nul(b"chronos\0").unwrap();
David Tolnay2b089fc2019-03-04 15:33:22 -08001024
Chirantan Ekboteebd56812018-04-16 19:32:04 -07001025 let chronos_uid = match get_user_id(&chronos_user_group) {
1026 Ok(u) => u,
1027 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001028 warn!("falling back to current user id for 9p: {}", e);
Chirantan Ekboteebd56812018-04-16 19:32:04 -07001029 geteuid()
1030 }
1031 };
David Tolnay2b089fc2019-03-04 15:33:22 -08001032
Chirantan Ekboteebd56812018-04-16 19:32:04 -07001033 let chronos_gid = match get_group_id(&chronos_user_group) {
1034 Ok(u) => u,
1035 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001036 warn!("falling back to current group id for 9p: {}", e);
Chirantan Ekboteebd56812018-04-16 19:32:04 -07001037 getegid()
1038 }
1039 };
1040
David Tolnayfd0971d2019-03-04 17:15:57 -08001041 Ids {
David Tolnay2b089fc2019-03-04 15:33:22 -08001042 uid: chronos_uid,
1043 gid: chronos_gid,
David Tolnayfd0971d2019-03-04 17:15:57 -08001044 }
David Tolnay41a6f842019-03-01 16:18:44 -08001045}
1046
David Tolnay48c48292019-03-01 16:54:25 -08001047// Set the uid/gid for the jailed process and give a basic id map. This is
1048// required for bind mounts to work.
David Tolnayfd0971d2019-03-04 17:15:57 -08001049fn add_crosvm_user_to_jail(jail: &mut Minijail, feature: &str) -> Result<Ids> {
David Tolnay48c48292019-03-01 16:54:25 -08001050 let crosvm_user_group = CStr::from_bytes_with_nul(b"crosvm\0").unwrap();
1051
1052 let crosvm_uid = match get_user_id(&crosvm_user_group) {
1053 Ok(u) => u,
1054 Err(e) => {
1055 warn!("falling back to current user id for {}: {}", feature, e);
1056 geteuid()
1057 }
1058 };
1059
1060 let crosvm_gid = match get_group_id(&crosvm_user_group) {
1061 Ok(u) => u,
1062 Err(e) => {
1063 warn!("falling back to current group id for {}: {}", feature, e);
1064 getegid()
1065 }
1066 };
1067
1068 jail.change_uid(crosvm_uid);
1069 jail.change_gid(crosvm_gid);
1070 jail.uidmap(&format!("{0} {0} 1", crosvm_uid))
1071 .map_err(Error::SettingUidMap)?;
1072 jail.gidmap(&format!("{0} {0} 1", crosvm_gid))
1073 .map_err(Error::SettingGidMap)?;
1074
David Tolnay41a6f842019-03-01 16:18:44 -08001075 Ok(Ids {
1076 uid: crosvm_uid,
1077 gid: crosvm_gid,
1078 })
David Tolnay48c48292019-03-01 16:54:25 -08001079}
1080
David Tolnayfd0971d2019-03-04 17:15:57 -08001081fn raw_fd_from_path(path: &Path) -> Result<RawFd> {
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001082 if !path.is_file() {
David Tolnayfd0971d2019-03-04 17:15:57 -08001083 return Err(Error::InvalidFdPath);
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001084 }
1085 let raw_fd = path
1086 .file_name()
1087 .and_then(|fd_osstr| fd_osstr.to_str())
1088 .and_then(|fd_str| fd_str.parse::<c_int>().ok())
1089 .ok_or(Error::InvalidFdPath)?;
David Tolnayfd0971d2019-03-04 17:15:57 -08001090 validate_raw_fd(raw_fd).map_err(Error::ValidateRawFd)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001091}
1092
David Tolnayfd0971d2019-03-04 17:15:57 -08001093fn create_input_socket(path: &Path) -> Result<UnixStream> {
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001094 if path.parent() == Some(Path::new("/proc/self/fd")) {
1095 // Safe because we will validate |raw_fd|.
1096 unsafe { Ok(UnixStream::from_raw_fd(raw_fd_from_path(path)?)) }
1097 } else {
David Tolnayfd0971d2019-03-04 17:15:57 -08001098 UnixStream::connect(path).map_err(Error::InputEventsOpen)
Jorge E. Moreiradffec502019-01-14 18:44:49 -08001099 }
1100}
1101
Matt Delco84cf9c02019-10-07 22:38:13 -07001102fn setup_vcpu_signal_handler(use_kvm_signals: bool) -> Result<()> {
1103 if use_kvm_signals {
1104 unsafe {
1105 extern "C" fn handle_signal() {}
1106 // Our signal handler does nothing and is trivially async signal safe.
1107 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
1108 .map_err(Error::RegisterSignalHandler)?;
1109 }
1110 block_signal(SIGRTMIN() + 0).map_err(Error::BlockSignal)?;
1111 } else {
1112 unsafe {
1113 extern "C" fn handle_signal() {
1114 Vcpu::set_local_immediate_exit(true);
1115 }
1116 register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
1117 .map_err(Error::RegisterSignalHandler)?;
1118 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001119 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001120 Ok(())
1121}
1122
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001123#[derive(Default)]
1124struct VcpuRunMode {
1125 mtx: Mutex<VmRunMode>,
1126 cvar: Condvar,
1127}
1128
1129impl VcpuRunMode {
1130 fn set_and_notify(&self, new_mode: VmRunMode) {
1131 *self.mtx.lock() = new_mode;
1132 self.cvar.notify_all();
1133 }
1134}
1135
Zach Reizner55a9e502018-10-03 10:22:32 -07001136fn run_vcpu(
Matt Delco84cf9c02019-10-07 22:38:13 -07001137 mut vcpu: Vcpu,
Zach Reizner55a9e502018-10-03 10:22:32 -07001138 cpu_id: u32,
Daniel Verkamp107edb32019-04-05 09:58:48 -07001139 vcpu_affinity: Vec<usize>,
Zach Reizner55a9e502018-10-03 10:22:32 -07001140 start_barrier: Arc<Barrier>,
1141 io_bus: devices::Bus,
1142 mmio_bus: devices::Bus,
1143 exit_evt: EventFd,
Zach Reizner795355a2019-01-16 17:37:57 -08001144 requires_kvmclock_ctrl: bool,
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001145 run_mode_arc: Arc<VcpuRunMode>,
Matt Delco84cf9c02019-10-07 22:38:13 -07001146 use_kvm_signals: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001147) -> Result<JoinHandle<()>> {
Zach Reizner8fb52112017-12-13 16:04:39 -08001148 thread::Builder::new()
1149 .name(format!("crosvm_vcpu{}", cpu_id))
1150 .spawn(move || {
Daniel Verkamp107edb32019-04-05 09:58:48 -07001151 if vcpu_affinity.len() != 0 {
1152 if let Err(e) = set_cpu_affinity(vcpu_affinity) {
1153 error!("Failed to set CPU affinity: {}", e);
1154 }
1155 }
1156
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001157 let mut sig_ok = true;
Matt Delco84cf9c02019-10-07 22:38:13 -07001158 if use_kvm_signals {
1159 match get_blocked_signals() {
1160 Ok(mut v) => {
1161 v.retain(|&x| x != SIGRTMIN() + 0);
1162 if let Err(e) = vcpu.set_signal_mask(&v) {
1163 error!(
1164 "Failed to set the KVM_SIGNAL_MASK for vcpu {} : {}",
1165 cpu_id, e
1166 );
1167 sig_ok = false;
1168 }
1169 }
1170 Err(e) => {
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001171 error!(
Matt Delco84cf9c02019-10-07 22:38:13 -07001172 "Failed to retrieve signal mask for vcpu {} : {}",
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001173 cpu_id, e
1174 );
1175 sig_ok = false;
1176 }
Matt Delco84cf9c02019-10-07 22:38:13 -07001177 };
1178 } else {
1179 vcpu.set_thread_id(SIGRTMIN() + 0);
1180 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001181
Zach Reizner8fb52112017-12-13 16:04:39 -08001182 start_barrier.wait();
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001183
David Tolnay8f3a2322018-11-30 17:11:35 -08001184 if sig_ok {
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001185 'vcpu_loop: loop {
1186 let mut interrupted_by_signal = false;
David Tolnay8f3a2322018-11-30 17:11:35 -08001187 match vcpu.run() {
1188 Ok(VcpuExit::IoIn { port, mut size }) => {
1189 let mut data = [0; 8];
1190 if size > data.len() {
1191 error!("unsupported IoIn size of {} bytes", size);
1192 size = data.len();
Zach Reizner39aa26b2017-12-12 18:03:23 -08001193 }
David Tolnay8f3a2322018-11-30 17:11:35 -08001194 io_bus.read(port as u64, &mut data[..size]);
1195 if let Err(e) = vcpu.set_data(&data[..size]) {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001196 error!("failed to set return data for IoIn: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001197 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001198 }
David Tolnay8f3a2322018-11-30 17:11:35 -08001199 Ok(VcpuExit::IoOut {
1200 port,
1201 mut size,
1202 data,
1203 }) => {
1204 if size > data.len() {
1205 error!("unsupported IoOut size of {} bytes", size);
1206 size = data.len();
1207 }
1208 io_bus.write(port as u64, &data[..size]);
1209 }
1210 Ok(VcpuExit::MmioRead { address, size }) => {
1211 let mut data = [0; 8];
1212 mmio_bus.read(address, &mut data[..size]);
1213 // Setting data for mmio can not fail.
1214 let _ = vcpu.set_data(&data[..size]);
1215 }
1216 Ok(VcpuExit::MmioWrite {
1217 address,
1218 size,
1219 data,
1220 }) => {
1221 mmio_bus.write(address, &data[..size]);
1222 }
1223 Ok(VcpuExit::Hlt) => break,
1224 Ok(VcpuExit::Shutdown) => break,
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001225 Ok(VcpuExit::SystemEvent(_, _)) => break,
David Tolnay8f3a2322018-11-30 17:11:35 -08001226 Ok(r) => warn!("unexpected vcpu exit: {:?}", r),
1227 Err(e) => match e.errno() {
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001228 libc::EINTR => interrupted_by_signal = true,
1229 libc::EAGAIN => {}
David Tolnay8f3a2322018-11-30 17:11:35 -08001230 _ => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001231 error!("vcpu hit unknown error: {}", e);
David Tolnay8f3a2322018-11-30 17:11:35 -08001232 break;
1233 }
1234 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08001235 }
Mark Ryan6ed5aea2018-04-20 13:52:35 +01001236
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001237 if interrupted_by_signal {
Matt Delco84cf9c02019-10-07 22:38:13 -07001238 if use_kvm_signals {
1239 // Try to clear the signal that we use to kick VCPU if it is pending before
1240 // attempting to handle pause requests.
1241 if let Err(e) = clear_signal(SIGRTMIN() + 0) {
1242 error!("failed to clear pending signal: {}", e);
1243 break;
1244 }
1245 } else {
1246 vcpu.set_immediate_exit(false);
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001247 }
1248 let mut run_mode_lock = run_mode_arc.mtx.lock();
1249 loop {
1250 match *run_mode_lock {
1251 VmRunMode::Running => break,
Zach Reizner795355a2019-01-16 17:37:57 -08001252 VmRunMode::Suspending => {
1253 // On KVM implementations that use a paravirtualized clock (e.g.
1254 // x86), a flag must be set to indicate to the guest kernel that
1255 // a VCPU was suspended. The guest kernel will use this flag to
1256 // prevent the soft lockup detection from triggering when this
1257 // VCPU resumes, which could happen days later in realtime.
1258 if requires_kvmclock_ctrl {
1259 if let Err(e) = vcpu.kvmclock_ctrl() {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001260 error!("failed to signal to kvm that vcpu {} is being suspended: {}", cpu_id, e);
Zach Reizner795355a2019-01-16 17:37:57 -08001261 }
1262 }
1263 }
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001264 VmRunMode::Exiting => break 'vcpu_loop,
1265 }
1266 // Give ownership of our exclusive lock to the condition variable that
1267 // will block. When the condition variable is notified, `wait` will
1268 // unblock and return a new exclusive lock.
1269 run_mode_lock = run_mode_arc.cvar.wait(run_mode_lock);
1270 }
1271 }
David Tolnay8f3a2322018-11-30 17:11:35 -08001272 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001273 }
Zach Reizner8fb52112017-12-13 16:04:39 -08001274 exit_evt
Zach Reizner39aa26b2017-12-12 18:03:23 -08001275 .write(1)
1276 .expect("failed to signal vcpu exit eventfd");
David Tolnay2bac1e72018-12-12 14:33:42 -08001277 })
1278 .map_err(Error::SpawnVcpu)
Zach Reizner39aa26b2017-12-12 18:03:23 -08001279}
1280
Sonny Raod5f66082019-04-24 12:24:38 -07001281// Reads the contents of a file and converts the space-separated fields into a Vec of u64s.
1282// Returns an error if any of the fields fail to parse.
1283fn file_fields_to_u64<P: AsRef<Path>>(path: P) -> io::Result<Vec<u64>> {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001284 let mut file = File::open(path)?;
1285
1286 let mut buf = [0u8; 32];
1287 let count = file.read(&mut buf)?;
1288
Zach Reizner55a9e502018-10-03 10:22:32 -07001289 let content =
1290 str::from_utf8(&buf[..count]).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1291 content
1292 .trim()
Sonny Raod5f66082019-04-24 12:24:38 -07001293 .split_whitespace()
1294 .map(|x| {
1295 x.parse::<u64>()
1296 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
1297 })
1298 .collect()
1299}
1300
1301// Reads the contents of a file and converts them into a u64, and if there
1302// are multiple fields it only returns the first one.
1303fn file_to_u64<P: AsRef<Path>>(path: P) -> io::Result<u64> {
1304 file_fields_to_u64(path)?
1305 .into_iter()
1306 .next()
1307 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "empty file"))
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001308}
1309
Dylan Reid059a1882018-07-23 17:58:09 -07001310pub fn run_config(cfg: Config) -> Result<()> {
Lepton Wu9105e9f2019-03-14 11:38:31 -07001311 if cfg.sandbox {
Dylan Reid059a1882018-07-23 17:58:09 -07001312 // Printing something to the syslog before entering minijail so that libc's syslogger has a
1313 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
1314 // access to those files will not be possible.
1315 info!("crosvm entering multiprocess mode");
1316 }
1317
Jingkui Wang100e6e42019-03-08 20:41:57 -08001318 let (usb_control_socket, usb_provider) =
David Tolnay5fb3f512019-04-12 19:22:33 -07001319 HostBackendDeviceProvider::new().map_err(Error::CreateUsbProvider)?;
Dylan Reid059a1882018-07-23 17:58:09 -07001320 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
1321 // before any jailed devices have been spawned, so that we can catch any of them that fail very
1322 // quickly.
1323 let sigchld_fd = SignalFd::new(libc::SIGCHLD).map_err(Error::CreateSignalFd)?;
1324
David Tolnay2b089fc2019-03-04 15:33:22 -08001325 let initrd_image = if let Some(initrd_path) = &cfg.initrd_path {
1326 Some(File::open(initrd_path).map_err(|e| Error::OpenInitrd(initrd_path.clone(), e))?)
Daniel Verkampe403f5c2018-12-11 16:29:26 -08001327 } else {
1328 None
1329 };
1330
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07001331 let vm_image = match cfg.executable_path {
1332 Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel(
1333 File::open(kernel_path).map_err(|e| Error::OpenKernel(kernel_path.to_path_buf(), e))?,
1334 ),
1335 Some(Executable::Bios(ref bios_path)) => VmImage::Bios(
1336 File::open(bios_path).map_err(|e| Error::OpenBios(bios_path.to_path_buf(), e))?,
1337 ),
1338 _ => panic!("Did not receive a bios or kernel, should be impossible."),
1339 };
1340
Dylan Reid059a1882018-07-23 17:58:09 -07001341 let components = VmComponents {
Jakub Staronf55f75d2019-04-26 11:22:51 -07001342 memory_size: (cfg.memory.unwrap_or(256) << 20) as u64,
Dylan Reid059a1882018-07-23 17:58:09 -07001343 vcpu_count: cfg.vcpu_count.unwrap_or(1),
Daniel Verkamp107edb32019-04-05 09:58:48 -07001344 vcpu_affinity: cfg.vcpu_affinity.clone(),
Cody Schuffelen6d1ab502019-05-21 12:12:38 -07001345 vm_image,
Tristan Muntsinger4133b012018-12-21 16:01:56 -08001346 android_fstab: cfg
1347 .android_fstab
1348 .as_ref()
David Tolnay2b089fc2019-03-04 15:33:22 -08001349 .map(|x| File::open(x).map_err(|e| Error::OpenAndroidFstab(x.to_path_buf(), e)))
Tristan Muntsinger4133b012018-12-21 16:01:56 -08001350 .map_or(Ok(None), |v| v.map(Some))?,
Daniel Verkampe403f5c2018-12-11 16:29:26 -08001351 initrd_image,
Daniel Verkampaac28132018-10-15 14:58:48 -07001352 extra_kernel_params: cfg.params.clone(),
1353 wayland_dmabuf: cfg.wayland_dmabuf,
Dylan Reid059a1882018-07-23 17:58:09 -07001354 };
1355
Zach Reiznera60744b2019-02-13 17:33:32 -08001356 let control_server_socket = match &cfg.socket_path {
1357 Some(path) => Some(UnlinkUnixSeqpacketListener(
1358 UnixSeqpacketListener::bind(path).map_err(Error::CreateSocket)?,
1359 )),
1360 None => None,
Dylan Reid059a1882018-07-23 17:58:09 -07001361 };
Zach Reiznera60744b2019-02-13 17:33:32 -08001362
1363 let mut control_sockets = Vec::new();
Zach Reizner55a9e502018-10-03 10:22:32 -07001364 let (wayland_host_socket, wayland_device_socket) =
Gurchetan Singh53edb812019-05-22 08:57:16 -07001365 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?;
1366 control_sockets.push(TaggedControlSocket::VmMemory(wayland_host_socket));
Dylan Reid059a1882018-07-23 17:58:09 -07001367 // Balloon gets a special socket so balloon requests can be forwarded from the main process.
Zach Reizner55a9e502018-10-03 10:22:32 -07001368 let (balloon_host_socket, balloon_device_socket) =
Jakub Staron1f828d72019-04-11 12:49:29 -07001369 msg_socket::pair::<BalloonControlCommand, ()>().map_err(Error::CreateSocket)?;
Dylan Reid059a1882018-07-23 17:58:09 -07001370
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001371 // Create one control socket per disk.
1372 let mut disk_device_sockets = Vec::new();
1373 let mut disk_host_sockets = Vec::new();
1374 let disk_count = cfg.disks.len();
1375 for _ in 0..disk_count {
1376 let (disk_host_socket, disk_device_socket) =
Jakub Staronecf81e02019-04-11 11:43:39 -07001377 msg_socket::pair::<DiskControlCommand, DiskControlResult>()
1378 .map_err(Error::CreateSocket)?;
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001379 disk_host_sockets.push(disk_host_socket);
Jakub Starone7c59052019-04-09 12:31:14 -07001380 disk_device_sockets.push(disk_device_socket);
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001381 }
1382
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001383 let (gpu_host_socket, gpu_device_socket) =
1384 msg_socket::pair::<VmMemoryResponse, VmMemoryRequest>().map_err(Error::CreateSocket)?;
1385 control_sockets.push(TaggedControlSocket::VmMemory(gpu_host_socket));
1386
Lepton Wu20333e42019-03-14 10:48:03 -07001387 let sandbox = cfg.sandbox;
Trent Begin17ccaad2019-04-17 13:51:25 -06001388 let linux = Arch::build_vm(
1389 components,
1390 cfg.split_irqchip,
1391 &cfg.serial_parameters,
Zach Reiznera8adff02019-08-13 11:20:14 -07001392 simple_jail(&cfg, "serial.policy")?,
Jakub Starona3411ea2019-04-24 10:55:25 -07001393 |mem, vm, sys_allocator, exit_evt| {
Trent Begin17ccaad2019-04-17 13:51:25 -06001394 create_devices(
1395 &cfg,
Jakub Starona3411ea2019-04-24 10:55:25 -07001396 mem,
1397 vm,
1398 sys_allocator,
1399 exit_evt,
Xiong Zhanga5d248c2019-09-17 14:17:19 -07001400 &mut control_sockets,
Trent Begin17ccaad2019-04-17 13:51:25 -06001401 wayland_device_socket,
Gurchetan Singh96beafc2019-05-15 09:46:52 -07001402 gpu_device_socket,
Trent Begin17ccaad2019-04-17 13:51:25 -06001403 balloon_device_socket,
1404 &mut disk_device_sockets,
1405 usb_provider,
1406 )
1407 },
1408 )
David Tolnaybe034262019-03-04 17:48:36 -08001409 .map_err(Error::BuildVm)?;
Lepton Wu60893882018-11-21 11:06:18 -08001410
1411 let _render_node_host = ();
1412 #[cfg(feature = "gpu-forward")]
1413 let (_render_node_host, linux) = {
1414 // Rebinds linux as mutable.
1415 let mut linux = linux;
1416
1417 // Reserve memory range for GPU buffer allocation in advance to bypass region count
1418 // limitation. We use mremap/MAP_FIXED later to make sure GPU buffers fall into this range.
1419 let gpu_mmap =
1420 MemoryMapping::new_protection(RENDER_NODE_HOST_SIZE as usize, Protection::none())
1421 .map_err(Error::ReserveGpuMemory)?;
1422
Xiong Zhang383b3b52019-10-30 14:59:26 +08001423 // Put the non-accessible memory map into high mmio so that no other devices use that
Lepton Wu60893882018-11-21 11:06:18 -08001424 // guest address space.
1425 let gpu_addr = linux
1426 .resources
Xiong Zhang383b3b52019-10-30 14:59:26 +08001427 .mmio_allocator(MmioType::High)
Daniel Prilikd92f81a2019-03-26 14:28:19 -07001428 .allocate(
1429 RENDER_NODE_HOST_SIZE,
1430 Alloc::GpuRenderNode,
1431 "gpu_render_node".to_string(),
1432 )
1433 .map_err(|_| Error::AllocateGpuDeviceAddress)?;
Lepton Wu60893882018-11-21 11:06:18 -08001434
1435 let host = RenderNodeHost::start(&gpu_mmap, gpu_addr, linux.vm.get_memory().clone());
1436
1437 // Makes the gpu memory accessible at allocated address.
1438 linux
1439 .vm
Xiong Zhang383b3b52019-10-30 14:59:26 +08001440 .add_mmio_memory(
Lepton Wu60893882018-11-21 11:06:18 -08001441 GuestAddress(gpu_addr),
1442 gpu_mmap,
1443 /* read_only = */ false,
1444 /* log_dirty_pages = */ false,
1445 )
1446 .map_err(Error::AddGpuDeviceMemory)?;
1447 (host, linux)
1448 };
1449
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001450 run_control(
1451 linux,
Zach Reiznera60744b2019-02-13 17:33:32 -08001452 control_server_socket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001453 control_sockets,
1454 balloon_host_socket,
1455 &disk_host_sockets,
Jingkui Wang100e6e42019-03-08 20:41:57 -08001456 usb_control_socket,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001457 sigchld_fd,
Lepton Wu60893882018-11-21 11:06:18 -08001458 _render_node_host,
Lepton Wu20333e42019-03-14 10:48:03 -07001459 sandbox,
Daniel Verkamp92f73d72018-12-04 13:17:46 -08001460 )
Dylan Reid0ed91ab2018-05-31 15:42:18 -07001461}
1462
Zach Reizner55a9e502018-10-03 10:22:32 -07001463fn run_control(
1464 mut linux: RunnableLinuxVm,
Zach Reiznera60744b2019-02-13 17:33:32 -08001465 control_server_socket: Option<UnlinkUnixSeqpacketListener>,
Jakub Starond99cd0a2019-04-11 14:09:39 -07001466 mut control_sockets: Vec<TaggedControlSocket>,
Jakub Staron1f828d72019-04-11 12:49:29 -07001467 balloon_host_socket: BalloonControlRequestSocket,
Jakub Staronecf81e02019-04-11 11:43:39 -07001468 disk_host_sockets: &[DiskControlRequestSocket],
Jingkui Wang100e6e42019-03-08 20:41:57 -08001469 usb_control_socket: UsbControlSocket,
Zach Reizner55a9e502018-10-03 10:22:32 -07001470 sigchld_fd: SignalFd,
Lepton Wu60893882018-11-21 11:06:18 -08001471 _render_node_host: RenderNodeHost,
Lepton Wu20333e42019-03-14 10:48:03 -07001472 sandbox: bool,
Zach Reizner55a9e502018-10-03 10:22:32 -07001473) -> Result<()> {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001474 // Paths to get the currently available memory and the low memory threshold.
David Tolnay5bbbf612018-12-01 17:49:30 -08001475 const LOWMEM_MARGIN: &str = "/sys/kernel/mm/chromeos-low_mem/margin";
1476 const LOWMEM_AVAILABLE: &str = "/sys/kernel/mm/chromeos-low_mem/available";
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001477
1478 // The amount of additional memory to claim back from the VM whenever the system is
1479 // low on memory.
1480 const ONE_GB: u64 = (1 << 30);
1481
Dylan Reid0ed91ab2018-05-31 15:42:18 -07001482 let max_balloon_memory = match linux.vm.get_memory().memory_size() {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001483 // If the VM has at least 1.5 GB, the balloon driver can consume all but the last 1 GB.
1484 n if n >= (ONE_GB / 2) * 3 => n - ONE_GB,
1485 // Otherwise, if the VM has at least 500MB the balloon driver will consume at most
1486 // half of it.
1487 n if n >= (ONE_GB / 2) => n / 2,
1488 // Otherwise, the VM is too small for us to take memory away from it.
1489 _ => 0,
1490 };
1491 let mut current_balloon_memory: u64 = 0;
1492 let balloon_memory_increment: u64 = max_balloon_memory / 16;
1493
Zach Reizner5bed0d22018-03-28 02:31:11 -07001494 #[derive(PollToken)]
1495 enum Token {
1496 Exit,
1497 Stdin,
1498 ChildSignal,
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001499 CheckAvailableMemory,
1500 LowMemory,
1501 LowmemTimer,
Zach Reiznera60744b2019-02-13 17:33:32 -08001502 VmControlServer,
Zach Reizner5bed0d22018-03-28 02:31:11 -07001503 VmControl { index: usize },
1504 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001505
1506 let stdin_handle = stdin();
1507 let stdin_lock = stdin_handle.lock();
1508 stdin_lock
1509 .set_raw_mode()
1510 .expect("failed to set terminal raw mode");
1511
Zach Reiznerb2110be2019-07-23 15:55:03 -07001512 let poll_ctx = PollContext::build_with(&[
1513 (&linux.exit_evt, Token::Exit),
1514 (&sigchld_fd, Token::ChildSignal),
1515 ])
1516 .map_err(Error::PollContextAdd)?;
1517
Zach Reizner5bed0d22018-03-28 02:31:11 -07001518 if let Err(e) = poll_ctx.add(&stdin_handle, Token::Stdin) {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001519 warn!("failed to add stdin to poll context: {}", e);
Zach Reizner5bed0d22018-03-28 02:31:11 -07001520 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001521
1522 if let Some(socket_server) = &control_server_socket {
1523 poll_ctx
1524 .add(socket_server, Token::VmControlServer)
1525 .map_err(Error::PollContextAdd)?;
1526 }
Dylan Reid059a1882018-07-23 17:58:09 -07001527 for (index, socket) in control_sockets.iter().enumerate() {
Zach Reizner55a9e502018-10-03 10:22:32 -07001528 poll_ctx
1529 .add(socket.as_ref(), Token::VmControl { index })
1530 .map_err(Error::PollContextAdd)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001531 }
1532
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001533 // Watch for low memory notifications and take memory back from the VM.
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001534 let low_mem = File::open("/dev/chromeos-low-mem").ok();
David Tolnay64cd5ea2019-04-15 15:56:35 -07001535 if let Some(low_mem) = &low_mem {
Zach Reizner55a9e502018-10-03 10:22:32 -07001536 poll_ctx
1537 .add(low_mem, Token::LowMemory)
1538 .map_err(Error::PollContextAdd)?;
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001539 } else {
1540 warn!("Unable to open low mem indicator, maybe not a chrome os kernel");
1541 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001542
1543 // Used to rate limit balloon requests.
1544 let mut lowmem_timer = TimerFd::new().map_err(Error::CreateTimerFd)?;
Zach Reizner55a9e502018-10-03 10:22:32 -07001545 poll_ctx
1546 .add(&lowmem_timer, Token::LowmemTimer)
1547 .map_err(Error::PollContextAdd)?;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001548
1549 // Used to check whether it's ok to start giving memory back to the VM.
1550 let mut freemem_timer = TimerFd::new().map_err(Error::CreateTimerFd)?;
Zach Reizner55a9e502018-10-03 10:22:32 -07001551 poll_ctx
1552 .add(&freemem_timer, Token::CheckAvailableMemory)
1553 .map_err(Error::PollContextAdd)?;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001554
1555 // Used to add jitter to timer values so that we don't have a thundering herd problem when
1556 // multiple VMs are running.
Daniel Prilik22006042019-01-14 14:19:04 -08001557 let mut simple_rng = SimpleRng::new(
1558 SystemTime::now()
1559 .duration_since(UNIX_EPOCH)
1560 .expect("time went backwards")
1561 .subsec_nanos() as u64,
1562 );
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001563
Lepton Wu20333e42019-03-14 10:48:03 -07001564 if sandbox {
1565 // Before starting VCPUs, in case we started with some capabilities, drop them all.
1566 drop_capabilities().map_err(Error::DropCapabilities)?;
1567 }
Dmitry Torokhov71006072019-03-06 10:56:51 -08001568
Daniel Verkamp37c4a782019-01-04 10:44:17 -08001569 let mut vcpu_handles = Vec::with_capacity(linux.vcpus.len());
1570 let vcpu_thread_barrier = Arc::new(Barrier::new(linux.vcpus.len() + 1));
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001571 let run_mode_arc = Arc::new(VcpuRunMode::default());
Matt Delco84cf9c02019-10-07 22:38:13 -07001572 let use_kvm_signals = !linux.kvm.check_extension(Cap::ImmediateExit);
1573 setup_vcpu_signal_handler(use_kvm_signals)?;
Daniel Verkamp94c35272019-09-12 13:31:30 -07001574 let vcpus = linux.vcpus.split_off(0);
1575 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
Zach Reizner55a9e502018-10-03 10:22:32 -07001576 let handle = run_vcpu(
1577 vcpu,
1578 cpu_id as u32,
Daniel Verkamp107edb32019-04-05 09:58:48 -07001579 linux.vcpu_affinity.clone(),
Zach Reizner55a9e502018-10-03 10:22:32 -07001580 vcpu_thread_barrier.clone(),
1581 linux.io_bus.clone(),
1582 linux.mmio_bus.clone(),
1583 linux.exit_evt.try_clone().map_err(Error::CloneEventFd)?,
Zach Reizner795355a2019-01-16 17:37:57 -08001584 linux.vm.check_extension(Cap::KvmclockCtrl),
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001585 run_mode_arc.clone(),
Matt Delco84cf9c02019-10-07 22:38:13 -07001586 use_kvm_signals,
Zach Reizner55a9e502018-10-03 10:22:32 -07001587 )?;
Dylan Reid059a1882018-07-23 17:58:09 -07001588 vcpu_handles.push(handle);
1589 }
1590 vcpu_thread_barrier.wait();
1591
Zach Reizner39aa26b2017-12-12 18:03:23 -08001592 'poll: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -07001593 let events = {
1594 match poll_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001595 Ok(v) => v,
1596 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001597 error!("failed to poll: {}", e);
Zach Reizner39aa26b2017-12-12 18:03:23 -08001598 break;
1599 }
1600 }
1601 };
Zach Reiznera60744b2019-02-13 17:33:32 -08001602
1603 let mut vm_control_indices_to_remove = Vec::new();
Zach Reizner5bed0d22018-03-28 02:31:11 -07001604 for event in events.iter_readable() {
1605 match event.token() {
1606 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001607 info!("vcpu requested shutdown");
1608 break 'poll;
1609 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001610 Token::Stdin => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001611 let mut out = [0u8; 64];
1612 match stdin_lock.read_raw(&mut out[..]) {
1613 Ok(0) => {
1614 // Zero-length read indicates EOF. Remove from pollables.
Zach Reizner5bed0d22018-03-28 02:31:11 -07001615 let _ = poll_ctx.delete(&stdin_handle);
Zach Reizner55a9e502018-10-03 10:22:32 -07001616 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001617 Err(e) => {
David Tolnayb4bd00f2019-02-12 17:51:26 -08001618 warn!("error while reading stdin: {}", e);
Zach Reizner5bed0d22018-03-28 02:31:11 -07001619 let _ = poll_ctx.delete(&stdin_handle);
Zach Reizner55a9e502018-10-03 10:22:32 -07001620 }
Jakub Staronb6515a92019-06-05 15:18:25 -07001621 Ok(count) => {
1622 if let Some(ref stdio_serial) = linux.stdio_serial {
Trent Begin17ccaad2019-04-17 13:51:25 -06001623 stdio_serial
Trent Begin17ccaad2019-04-17 13:51:25 -06001624 .queue_input_bytes(&out[..count])
1625 .expect("failed to queue bytes into serial port");
1626 }
Jakub Staronb6515a92019-06-05 15:18:25 -07001627 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001628 }
1629 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001630 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001631 // Print all available siginfo structs, then exit the loop.
David Tolnayf5032762018-12-03 10:46:45 -08001632 while let Some(siginfo) = sigchld_fd.read().map_err(Error::SignalFd)? {
Zach Reizner3ba00982019-01-23 19:04:43 -08001633 let pid = siginfo.ssi_pid;
1634 let pid_label = match linux.pid_debug_label_map.get(&pid) {
1635 Some(label) => format!("{} (pid {})", label, pid),
1636 None => format!("pid {}", pid),
1637 };
David Tolnayf5032762018-12-03 10:46:45 -08001638 error!(
1639 "child {} died: signo {}, status {}, code {}",
Zach Reizner3ba00982019-01-23 19:04:43 -08001640 pid_label, siginfo.ssi_signo, siginfo.ssi_status, siginfo.ssi_code
David Tolnayf5032762018-12-03 10:46:45 -08001641 );
Zach Reizner39aa26b2017-12-12 18:03:23 -08001642 }
David Tolnayf5032762018-12-03 10:46:45 -08001643 break 'poll;
Zach Reizner39aa26b2017-12-12 18:03:23 -08001644 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001645 Token::CheckAvailableMemory => {
1646 // Acknowledge the timer.
1647 freemem_timer.wait().map_err(Error::TimerFd)?;
1648 if current_balloon_memory == 0 {
1649 // Nothing to see here.
1650 if let Err(e) = freemem_timer.clear() {
1651 warn!("unable to clear available memory check timer: {}", e);
1652 }
1653 continue;
1654 }
1655
1656 // Otherwise see if we can free up some memory.
1657 let margin = file_to_u64(LOWMEM_MARGIN).map_err(Error::ReadLowmemMargin)?;
Zach Reizner55a9e502018-10-03 10:22:32 -07001658 let available =
1659 file_to_u64(LOWMEM_AVAILABLE).map_err(Error::ReadLowmemAvailable)?;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001660
1661 // `available` and `margin` are specified in MB while `balloon_memory_increment` is in
1662 // bytes. So to correctly compare them we need to turn the increment value into MB.
Zach Reizner55a9e502018-10-03 10:22:32 -07001663 if available >= margin + 2 * (balloon_memory_increment >> 20) {
1664 current_balloon_memory =
1665 if current_balloon_memory >= balloon_memory_increment {
1666 current_balloon_memory - balloon_memory_increment
1667 } else {
1668 0
1669 };
Jakub Staron1f828d72019-04-11 12:49:29 -07001670 let command = BalloonControlCommand::Adjust {
1671 num_bytes: current_balloon_memory,
1672 };
1673 if let Err(e) = balloon_host_socket.send(&command) {
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001674 warn!("failed to send memory value to balloon device: {}", e);
1675 }
1676 }
1677 }
1678 Token::LowMemory => {
David Tolnay64cd5ea2019-04-15 15:56:35 -07001679 if let Some(low_mem) = &low_mem {
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001680 let old_balloon_memory = current_balloon_memory;
Zach Reizner55a9e502018-10-03 10:22:32 -07001681 current_balloon_memory = min(
1682 current_balloon_memory + balloon_memory_increment,
1683 max_balloon_memory,
1684 );
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001685 if current_balloon_memory != old_balloon_memory {
Jakub Staron1f828d72019-04-11 12:49:29 -07001686 let command = BalloonControlCommand::Adjust {
1687 num_bytes: current_balloon_memory,
1688 };
1689 if let Err(e) = balloon_host_socket.send(&command) {
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001690 warn!("failed to send memory value to balloon device: {}", e);
1691 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001692 }
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001693
1694 // Stop polling the lowmem device until the timer fires.
1695 poll_ctx.delete(low_mem).map_err(Error::PollContextDelete)?;
1696
1697 // Add some jitter to the timer so that if there are multiple VMs running
1698 // they don't all start ballooning at exactly the same time.
Daniel Prilik22006042019-01-14 14:19:04 -08001699 let lowmem_dur = Duration::from_millis(1000 + simple_rng.rng() % 200);
Zach Reizner55a9e502018-10-03 10:22:32 -07001700 lowmem_timer
1701 .reset(lowmem_dur, None)
1702 .map_err(Error::ResetTimerFd)?;
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001703
1704 // Also start a timer to check when we can start giving memory back. Do the
1705 // first check after a minute (with jitter) and subsequent checks after
1706 // every 30 seconds (with jitter).
Daniel Prilik22006042019-01-14 14:19:04 -08001707 let freemem_dur = Duration::from_secs(60 + simple_rng.rng() % 12);
1708 let freemem_int = Duration::from_secs(30 + simple_rng.rng() % 6);
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001709 freemem_timer
1710 .reset(freemem_dur, Some(freemem_int))
1711 .map_err(Error::ResetTimerFd)?;
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001712 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001713 }
1714 Token::LowmemTimer => {
1715 // Acknowledge the timer.
1716 lowmem_timer.wait().map_err(Error::TimerFd)?;
1717
David Tolnay64cd5ea2019-04-15 15:56:35 -07001718 if let Some(low_mem) = &low_mem {
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001719 // Start polling the lowmem device again.
Zach Reizner55a9e502018-10-03 10:22:32 -07001720 poll_ctx
1721 .add(low_mem, Token::LowMemory)
1722 .map_err(Error::PollContextAdd)?;
Dylan Reidf11e6ed2018-07-31 10:24:06 -07001723 }
Chirantan Ekbote448516e2018-07-24 16:07:42 -07001724 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001725 Token::VmControlServer => {
1726 if let Some(socket_server) = &control_server_socket {
1727 match socket_server.accept() {
1728 Ok(socket) => {
1729 poll_ctx
1730 .add(
1731 &socket,
1732 Token::VmControl {
1733 index: control_sockets.len(),
1734 },
1735 )
1736 .map_err(Error::PollContextAdd)?;
Jakub Starond99cd0a2019-04-11 14:09:39 -07001737 control_sockets
1738 .push(TaggedControlSocket::Vm(MsgSocket::new(socket)));
Zach Reiznera60744b2019-02-13 17:33:32 -08001739 }
1740 Err(e) => error!("failed to accept socket: {}", e),
1741 }
1742 }
1743 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001744 Token::VmControl { index } => {
Daniel Verkamp37c4a782019-01-04 10:44:17 -08001745 if let Some(socket) = control_sockets.get(index) {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001746 match socket {
1747 TaggedControlSocket::Vm(socket) => match socket.recv() {
1748 Ok(request) => {
1749 let mut run_mode_opt = None;
1750 let response = request.execute(
1751 &mut run_mode_opt,
1752 &balloon_host_socket,
1753 disk_host_sockets,
1754 &usb_control_socket,
1755 );
1756 if let Err(e) = socket.send(&response) {
1757 error!("failed to send VmResponse: {}", e);
1758 }
1759 if let Some(run_mode) = run_mode_opt {
1760 info!("control socket changed run mode to {}", run_mode);
1761 match run_mode {
1762 VmRunMode::Exiting => {
1763 break 'poll;
1764 }
1765 other => {
1766 run_mode_arc.set_and_notify(other);
1767 for handle in &vcpu_handles {
1768 let _ = handle.kill(SIGRTMIN() + 0);
1769 }
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001770 }
1771 }
1772 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001773 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001774 Err(e) => {
1775 if let MsgError::BadRecvSize { actual: 0, .. } = e {
1776 vm_control_indices_to_remove.push(index);
1777 } else {
1778 error!("failed to recv VmRequest: {}", e);
1779 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001780 }
Jakub Starond99cd0a2019-04-11 14:09:39 -07001781 },
Gurchetan Singh53edb812019-05-22 08:57:16 -07001782 TaggedControlSocket::VmMemory(socket) => match socket.recv() {
Jakub Starond99cd0a2019-04-11 14:09:39 -07001783 Ok(request) => {
1784 let response =
1785 request.execute(&mut linux.vm, &mut linux.resources);
1786 if let Err(e) = socket.send(&response) {
Gurchetan Singh53edb812019-05-22 08:57:16 -07001787 error!("failed to send VmMemoryControlResponse: {}", e);
Jakub Starond99cd0a2019-04-11 14:09:39 -07001788 }
1789 }
1790 Err(e) => {
1791 if let MsgError::BadRecvSize { actual: 0, .. } = e {
1792 vm_control_indices_to_remove.push(index);
1793 } else {
Gurchetan Singh53edb812019-05-22 08:57:16 -07001794 error!("failed to recv VmMemoryControlRequest: {}", e);
Jakub Starond99cd0a2019-04-11 14:09:39 -07001795 }
1796 }
1797 },
Xiong Zhang2515b752019-09-19 10:29:02 +08001798 TaggedControlSocket::VmIrq(socket) => match socket.recv() {
1799 Ok(request) => {
1800 let response =
1801 request.execute(&mut linux.vm, &mut linux.resources);
1802 if let Err(e) = socket.send(&response) {
1803 error!("failed to send VmIrqResponse: {}", e);
1804 }
1805 }
1806 Err(e) => {
1807 if let MsgError::BadRecvSize { actual: 0, .. } = e {
1808 vm_control_indices_to_remove.push(index);
1809 } else {
1810 error!("failed to recv VmIrqRequest: {}", e);
1811 }
1812 }
1813 },
Zach Reizner39aa26b2017-12-12 18:03:23 -08001814 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001815 }
1816 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001817 }
1818 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001819
Zach Reizner5bed0d22018-03-28 02:31:11 -07001820 for event in events.iter_hungup() {
Zach Reiznera60744b2019-02-13 17:33:32 -08001821 match event.token() {
1822 Token::Exit => {}
1823 Token::Stdin => {
1824 let _ = poll_ctx.delete(&stdin_handle);
1825 }
1826 Token::ChildSignal => {}
1827 Token::CheckAvailableMemory => {}
1828 Token::LowMemory => {}
1829 Token::LowmemTimer => {}
1830 Token::VmControlServer => {}
1831 Token::VmControl { index } => {
1832 // It's possible more data is readable and buffered while the socket is hungup,
1833 // so don't delete the socket from the poll context until we're sure all the
1834 // data is read.
Jakub Starond99cd0a2019-04-11 14:09:39 -07001835 match control_sockets
1836 .get(index)
1837 .map(|s| s.as_ref().get_readable_bytes())
1838 {
Zach Reiznera60744b2019-02-13 17:33:32 -08001839 Some(Ok(0)) | Some(Err(_)) => vm_control_indices_to_remove.push(index),
1840 Some(Ok(x)) => info!("control index {} has {} bytes readable", index, x),
1841 _ => {}
Zach Reizner55a9e502018-10-03 10:22:32 -07001842 }
Zach Reizner5bed0d22018-03-28 02:31:11 -07001843 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001844 }
1845 }
Zach Reiznera60744b2019-02-13 17:33:32 -08001846
1847 // Sort in reverse so the highest indexes are removed first. This removal algorithm
1848 // preserved correct indexes as each element is removed.
1849 vm_control_indices_to_remove.sort_unstable_by(|a, b| b.cmp(a));
1850 vm_control_indices_to_remove.dedup();
1851 for index in vm_control_indices_to_remove {
1852 control_sockets.swap_remove(index);
1853 if let Some(socket) = control_sockets.get(index) {
1854 poll_ctx
Xiong Zhang44bb3dd2019-04-23 17:09:50 +08001855 .modify(
1856 socket,
1857 WatchingEvents::empty().set_read(),
1858 Token::VmControl { index },
1859 )
Zach Reiznera60744b2019-02-13 17:33:32 -08001860 .map_err(Error::PollContextAdd)?;
1861 }
1862 }
Zach Reizner39aa26b2017-12-12 18:03:23 -08001863 }
1864
Zach Reizner6a8fdd92019-01-16 14:38:41 -08001865 // VCPU threads MUST see the VmRunMode flag, otherwise they may re-enter the VM.
1866 run_mode_arc.set_and_notify(VmRunMode::Exiting);
Dylan Reid059a1882018-07-23 17:58:09 -07001867 for handle in vcpu_handles {
Dmitry Torokhovcd405332018-02-16 16:25:54 -08001868 match handle.kill(SIGRTMIN() + 0) {
Zach Reizner39aa26b2017-12-12 18:03:23 -08001869 Ok(_) => {
1870 if let Err(e) = handle.join() {
1871 error!("failed to join vcpu thread: {:?}", e);
1872 }
1873 }
David Tolnayb4bd00f2019-02-12 17:51:26 -08001874 Err(e) => error!("failed to kill vcpu thread: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -08001875 }
1876 }
1877
Daniel Verkamp94c35272019-09-12 13:31:30 -07001878 // Explicitly drop the VM structure here to allow the devices to clean up before the
1879 // control sockets are closed when this function exits.
1880 mem::drop(linux);
1881
Zach Reizner39aa26b2017-12-12 18:03:23 -08001882 stdin_lock
1883 .set_canon_mode()
1884 .expect("failed to restore canonical mode for terminal");
1885
1886 Ok(())
1887}