Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 1 | // Copyright 2017 The Chromium OS Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | use std; |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 6 | use std::cmp::min; |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 7 | use std::ffi::CStr; |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 8 | use std::fmt; |
Sonny Rao | ed517d1 | 2018-02-13 22:09:43 -0800 | [diff] [blame] | 9 | use std::error; |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 10 | use std::fs::{File, OpenOptions}; |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 11 | use std::io::{self, Read, stdin}; |
| 12 | use std::mem; |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 13 | use std::os::unix::io::{FromRawFd, RawFd}; |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 14 | use std::os::unix::net::UnixDatagram; |
| 15 | use std::path::{Path, PathBuf}; |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 16 | use std::str; |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 17 | use std::sync::atomic::{AtomicBool, Ordering}; |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 18 | use std::sync::{Arc, Barrier}; |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 19 | use std::thread; |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 20 | use std::time::Duration; |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 21 | use std::thread::JoinHandle; |
| 22 | |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 23 | use libc::{self, c_int}; |
| 24 | use rand::thread_rng; |
| 25 | use rand::distributions::{IndependentSample, Range}; |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 26 | |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 27 | use byteorder::{ByteOrder, LittleEndian}; |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 28 | use devices; |
| 29 | use io_jail::{self, Minijail}; |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 30 | use kvm::*; |
Jason D. Clinton | 865323d | 2017-09-27 22:04:03 -0600 | [diff] [blame] | 31 | use net_util::Tap; |
Dylan Reid | 88624f8 | 2018-01-11 09:20:16 -0800 | [diff] [blame] | 32 | use qcow::{self, QcowFile}; |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 33 | use sys_util::*; |
| 34 | use sys_util; |
Jason D. Clinton | 865323d | 2017-09-27 22:04:03 -0600 | [diff] [blame] | 35 | use vhost; |
Dylan Reid | 228e4a6 | 2018-06-07 15:42:41 -0700 | [diff] [blame] | 36 | use vm_control::VmRequest; |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 37 | |
| 38 | use Config; |
Dylan Reid | 88624f8 | 2018-01-11 09:20:16 -0800 | [diff] [blame] | 39 | use DiskType; |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 40 | use VirtIoDeviceInfo; |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 41 | |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 42 | use arch::{self, LinuxArch, RunnableLinuxVm, VirtioDeviceStub, VmComponents}; |
Sonny Rao | ed517d1 | 2018-02-13 22:09:43 -0800 | [diff] [blame] | 43 | |
| 44 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
| 45 | use x86_64::X8664arch as Arch; |
Sonny Rao | 2ffa0cb | 2018-02-26 17:27:40 -0800 | [diff] [blame] | 46 | #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] |
| 47 | use aarch64::AArch64 as Arch; |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 48 | |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 49 | #[derive(Debug)] |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 50 | pub enum Error { |
Dylan Reid | 295ccac | 2017-11-06 14:06:24 -0800 | [diff] [blame] | 51 | BalloonDeviceNew(devices::virtio::BalloonError), |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 52 | BlockDeviceNew(sys_util::Error), |
Mark Ryan | 6ed5aea | 2018-04-20 13:52:35 +0100 | [diff] [blame] | 53 | BlockSignal(sys_util::signal::Error), |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 54 | BuildingVm(Box<error::Error>), |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 55 | CloneEventFd(sys_util::Error), |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 56 | CreateEventFd(sys_util::Error), |
Zach Reizner | 5bed0d2 | 2018-03-28 02:31:11 -0700 | [diff] [blame] | 57 | CreatePollContext(sys_util::Error), |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 58 | CreateSignalFd(sys_util::SignalFdError), |
| 59 | CreateSocket(io::Error), |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 60 | CreateTimerFd(sys_util::Error), |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 61 | DeviceJail(io_jail::Error), |
| 62 | DevicePivotRoot(io_jail::Error), |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 63 | Disk(io::Error), |
Stephen Barber | c79de2d | 2018-02-21 14:17:27 -0800 | [diff] [blame] | 64 | DiskImageLock(sys_util::Error), |
Dylan Reid | 2056644 | 2018-04-02 15:06:15 -0700 | [diff] [blame] | 65 | FailedCLOEXECCheck, |
| 66 | FailedToDupFd, |
Dylan Reid | 2056644 | 2018-04-02 15:06:15 -0700 | [diff] [blame] | 67 | InvalidFdPath, |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 68 | NetDeviceNew(devices::virtio::NetError), |
| 69 | NoVarEmpty, |
| 70 | OpenKernel(PathBuf, io::Error), |
Chirantan Ekbote | ebd5681 | 2018-04-16 19:32:04 -0700 | [diff] [blame] | 71 | P9DeviceNew(devices::virtio::P9Error), |
Zach Reizner | 5bed0d2 | 2018-03-28 02:31:11 -0700 | [diff] [blame] | 72 | PollContextAdd(sys_util::Error), |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 73 | PollContextDelete(sys_util::Error), |
Dylan Reid | 88624f8 | 2018-01-11 09:20:16 -0800 | [diff] [blame] | 74 | QcowDeviceCreate(qcow::Error), |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 75 | ReadLowmemAvailable(io::Error), |
| 76 | ReadLowmemMargin(io::Error), |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 77 | RegisterBalloon(arch::MmioRegisterError), |
| 78 | RegisterBlock(arch::MmioRegisterError), |
| 79 | RegisterGpu(arch::MmioRegisterError), |
| 80 | RegisterNet(arch::MmioRegisterError), |
| 81 | RegisterP9(arch::MmioRegisterError), |
| 82 | RegisterRng(arch::MmioRegisterError), |
Mark Ryan | 6ed5aea | 2018-04-20 13:52:35 +0100 | [diff] [blame] | 83 | RegisterSignalHandler(sys_util::Error), |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 84 | RegisterWayland(arch::MmioRegisterError), |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 85 | ResetTimerFd(sys_util::Error), |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 86 | RngDeviceNew(devices::virtio::RngError), |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 87 | SettingGidMap(io_jail::Error), |
| 88 | SettingUidMap(io_jail::Error), |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 89 | SignalFd(sys_util::SignalFdError), |
| 90 | SpawnVcpu(io::Error), |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 91 | TimerFd(sys_util::Error), |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 92 | VhostNetDeviceNew(devices::virtio::vhost::Error), |
| 93 | VhostVsockDeviceNew(devices::virtio::vhost::Error), |
| 94 | WaylandDeviceNew(sys_util::Error), |
Sonny Rao | ed517d1 | 2018-02-13 22:09:43 -0800 | [diff] [blame] | 95 | LoadKernel(Box<error::Error>), |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 96 | } |
| 97 | |
| 98 | impl fmt::Display for Error { |
| 99 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| 100 | match self { |
Dylan Reid | 295ccac | 2017-11-06 14:06:24 -0800 | [diff] [blame] | 101 | &Error::BalloonDeviceNew(ref e) => write!(f, "failed to create balloon: {:?}", e), |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 102 | &Error::BlockDeviceNew(ref e) => write!(f, "failed to create block device: {:?}", e), |
Mark Ryan | 6ed5aea | 2018-04-20 13:52:35 +0100 | [diff] [blame] | 103 | &Error::BlockSignal(ref e) => write!(f, "failed to block signal: {:?}", e), |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 104 | &Error::BuildingVm(ref e) => { |
| 105 | write!(f, "The architecture failed to build the vm: {:?}", e) |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 106 | } |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 107 | &Error::CloneEventFd(ref e) => write!(f, "failed to clone eventfd: {:?}", e), |
| 108 | &Error::CreateEventFd(ref e) => write!(f, "failed to create eventfd: {:?}", e), |
Zach Reizner | 5bed0d2 | 2018-03-28 02:31:11 -0700 | [diff] [blame] | 109 | &Error::CreatePollContext(ref e) => write!(f, "failed to create poll context: {:?}", e), |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 110 | &Error::CreateSignalFd(ref e) => write!(f, "failed to create signalfd: {:?}", e), |
| 111 | &Error::CreateSocket(ref e) => write!(f, "failed to create socket: {}", e), |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 112 | &Error::CreateTimerFd(ref e) => write!(f, "failed to create timerfd: {}", e), |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 113 | &Error::DeviceJail(ref e) => write!(f, "failed to jail device: {}", e), |
| 114 | &Error::DevicePivotRoot(ref e) => write!(f, "failed to pivot root device: {}", e), |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 115 | &Error::Disk(ref e) => write!(f, "failed to load disk image: {}", e), |
Stephen Barber | c79de2d | 2018-02-21 14:17:27 -0800 | [diff] [blame] | 116 | &Error::DiskImageLock(ref e) => write!(f, "failed to lock disk image: {:?}", e), |
Dylan Reid | 2056644 | 2018-04-02 15:06:15 -0700 | [diff] [blame] | 117 | &Error::FailedCLOEXECCheck => { |
| 118 | write!(f, "/proc/self/fd argument failed check for CLOEXEC") |
| 119 | } |
| 120 | &Error::FailedToDupFd => write!(f, "failed to dup fd from /proc/self/fd"), |
Dylan Reid | 2056644 | 2018-04-02 15:06:15 -0700 | [diff] [blame] | 121 | &Error::InvalidFdPath => write!(f, "failed parsing a /proc/self/fd/*"), |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 122 | &Error::NetDeviceNew(ref e) => write!(f, "failed to set up virtio networking: {:?}", e), |
| 123 | &Error::NoVarEmpty => write!(f, "/var/empty doesn't exist, can't jail devices."), |
| 124 | &Error::OpenKernel(ref p, ref e) => { |
| 125 | write!(f, "failed to open kernel image {:?}: {}", p, e) |
| 126 | } |
Chirantan Ekbote | ebd5681 | 2018-04-16 19:32:04 -0700 | [diff] [blame] | 127 | &Error::P9DeviceNew(ref e) => write!(f, "failed to create 9p device: {}", e), |
Zach Reizner | 5bed0d2 | 2018-03-28 02:31:11 -0700 | [diff] [blame] | 128 | &Error::PollContextAdd(ref e) => write!(f, "failed to add fd to poll context: {:?}", e), |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 129 | &Error::PollContextDelete(ref e) => { |
| 130 | write!(f, "failed to remove fd from poll context: {:?}", e) |
| 131 | } |
Dylan Reid | 88624f8 | 2018-01-11 09:20:16 -0800 | [diff] [blame] | 132 | &Error::QcowDeviceCreate(ref e) => { |
| 133 | write!(f, "failed to read qcow formatted file {:?}", e) |
| 134 | } |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 135 | &Error::ReadLowmemAvailable(ref e) => { |
| 136 | write!(f, "failed to read /sys/kernel/mm/chromeos-low_mem/available: {}", e) |
| 137 | } |
| 138 | &Error::ReadLowmemMargin(ref e) => { |
| 139 | write!(f, "failed to read /sys/kernel/mm/chromeos-low_mem/margin: {}", e) |
| 140 | } |
Dylan Reid | 295ccac | 2017-11-06 14:06:24 -0800 | [diff] [blame] | 141 | &Error::RegisterBalloon(ref e) => { |
| 142 | write!(f, "error registering balloon device: {:?}", e) |
| 143 | }, |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 144 | &Error::RegisterBlock(ref e) => write!(f, "error registering block device: {:?}", e), |
Zach Reizner | 3a8100a | 2017-09-13 19:15:43 -0700 | [diff] [blame] | 145 | &Error::RegisterGpu(ref e) => write!(f, "error registering gpu device: {:?}", e), |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 146 | &Error::RegisterNet(ref e) => write!(f, "error registering net device: {:?}", e), |
Chirantan Ekbote | ebd5681 | 2018-04-16 19:32:04 -0700 | [diff] [blame] | 147 | &Error::RegisterP9(ref e) => write!(f, "error registering 9p device: {:?}", e), |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 148 | &Error::RegisterRng(ref e) => write!(f, "error registering rng device: {:?}", e), |
Mark Ryan | 6ed5aea | 2018-04-20 13:52:35 +0100 | [diff] [blame] | 149 | &Error::RegisterSignalHandler(ref e) => { |
| 150 | write!(f, "error registering signal handler: {:?}", e) |
| 151 | } |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 152 | &Error::RegisterWayland(ref e) => write!(f, "error registering wayland device: {}", e), |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 153 | &Error::ResetTimerFd(ref e) => write!(f, "failed to reset timerfd: {}", e), |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 154 | &Error::RngDeviceNew(ref e) => write!(f, "failed to set up rng: {:?}", e), |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 155 | &Error::SettingGidMap(ref e) => write!(f, "error setting GID map: {}", e), |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 156 | &Error::SettingUidMap(ref e) => write!(f, "error setting UID map: {}", e), |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 157 | &Error::SignalFd(ref e) => write!(f, "failed to read signal fd: {:?}", e), |
| 158 | &Error::SpawnVcpu(ref e) => write!(f, "failed to spawn VCPU thread: {:?}", e), |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 159 | &Error::TimerFd(ref e) => write!(f, "failed to read timer fd: {:?}", e), |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 160 | &Error::VhostNetDeviceNew(ref e) => { |
| 161 | write!(f, "failed to set up vhost networking: {:?}", e) |
| 162 | } |
| 163 | &Error::VhostVsockDeviceNew(ref e) => { |
| 164 | write!(f, "failed to set up virtual socket device: {:?}", e) |
| 165 | } |
| 166 | &Error::WaylandDeviceNew(ref e) => { |
| 167 | write!(f, "failed to create wayland device: {:?}", e) |
| 168 | } |
Sonny Rao | ed517d1 | 2018-02-13 22:09:43 -0800 | [diff] [blame] | 169 | &Error::LoadKernel(ref e) => write!(f, "failed to load kernel: {}", e), |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 170 | } |
| 171 | } |
| 172 | } |
| 173 | |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 174 | impl std::error::Error for Error { |
| 175 | fn description(&self) -> &str { |
| 176 | "Some device failure" |
| 177 | } |
| 178 | } |
| 179 | |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 180 | type Result<T> = std::result::Result<T, Error>; |
| 181 | |
Chirantan Ekbote | 5f78721 | 2018-05-31 15:31:31 -0700 | [diff] [blame] | 182 | // Verifies that |raw_fd| is actually owned by this process and duplicates it to ensure that |
| 183 | // we have a unique handle to it. |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 184 | fn validate_raw_fd(raw_fd: RawFd) -> std::result::Result<RawFd, Box<error::Error>> { |
Chirantan Ekbote | 5f78721 | 2018-05-31 15:31:31 -0700 | [diff] [blame] | 185 | // Checking that close-on-exec isn't set helps filter out FDs that were opened by |
| 186 | // crosvm as all crosvm FDs are close on exec. |
| 187 | // Safe because this doesn't modify any memory and we check the return value. |
| 188 | let flags = unsafe { libc::fcntl(raw_fd, libc::F_GETFD) }; |
| 189 | if flags < 0 || (flags & libc::FD_CLOEXEC) != 0 { |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 190 | return Err(Box::new(Error::FailedCLOEXECCheck)); |
Chirantan Ekbote | 5f78721 | 2018-05-31 15:31:31 -0700 | [diff] [blame] | 191 | } |
| 192 | |
| 193 | // Duplicate the fd to ensure that we don't accidentally close an fd previously |
| 194 | // opened by another subsystem. Safe because this doesn't modify any memory and |
| 195 | // we check the return value. |
| 196 | let dup_fd = unsafe { libc::fcntl(raw_fd, libc::F_DUPFD_CLOEXEC, 0) }; |
| 197 | if dup_fd < 0 { |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 198 | return Err(Box::new(Error::FailedToDupFd)); |
Chirantan Ekbote | 5f78721 | 2018-05-31 15:31:31 -0700 | [diff] [blame] | 199 | } |
| 200 | Ok(dup_fd as RawFd) |
| 201 | } |
| 202 | |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 203 | fn create_base_minijail(root: &Path, seccomp_policy: &Path) -> Result<Minijail> { |
| 204 | // All child jails run in a new user namespace without any users mapped, |
| 205 | // they run as nobody unless otherwise configured. |
| 206 | let mut j = Minijail::new().map_err(|e| Error::DeviceJail(e))?; |
| 207 | j.namespace_pids(); |
| 208 | j.namespace_user(); |
| 209 | j.namespace_user_disable_setgroups(); |
| 210 | // Don't need any capabilities. |
| 211 | j.use_caps(0); |
| 212 | // Create a new mount namespace with an empty root FS. |
| 213 | j.namespace_vfs(); |
| 214 | j.enter_pivot_root(root) |
| 215 | .map_err(|e| Error::DevicePivotRoot(e))?; |
| 216 | // Run in an empty network namespace. |
| 217 | j.namespace_net(); |
| 218 | // Apply the block device seccomp policy. |
| 219 | j.no_new_privs(); |
Stephen Barber | 3b1d8a5 | 2018-01-06 17:34:51 -0800 | [diff] [blame] | 220 | // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP, which will correctly kill |
| 221 | // the entire device process if a worker thread commits a seccomp violation. |
| 222 | j.set_seccomp_filter_tsync(); |
Zach Reizner | 043ddc5 | 2018-04-03 20:47:21 -0700 | [diff] [blame] | 223 | #[cfg(debug_assertions)] |
| 224 | j.log_seccomp_filter_failures(); |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 225 | j.parse_seccomp_filters(seccomp_policy) |
| 226 | .map_err(|e| Error::DeviceJail(e))?; |
| 227 | j.use_seccomp_filter(); |
| 228 | // Don't do init setup. |
| 229 | j.run_as_init(); |
| 230 | Ok(j) |
| 231 | } |
| 232 | |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 233 | fn create_virtio_devs(cfg: VirtIoDeviceInfo, |
| 234 | mem: &GuestMemory, |
| 235 | _exit_evt: &EventFd, |
| 236 | wayland_device_socket: UnixDatagram, |
| 237 | balloon_device_socket: UnixDatagram) |
| 238 | -> std::result::Result<Vec<VirtioDeviceStub>, Box<error::Error>> { |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 239 | static DEFAULT_PIVOT_ROOT: &'static str = "/var/empty"; |
Dylan Reid | ef7352f | 2018-05-17 18:47:11 -0700 | [diff] [blame] | 240 | |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 241 | let mut devs = Vec::new(); |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 242 | |
| 243 | // An empty directory for jailed device's pivot root. |
| 244 | let empty_root_path = Path::new(DEFAULT_PIVOT_ROOT); |
| 245 | if cfg.multiprocess && !empty_root_path.exists() { |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 246 | return Err(Box::new(Error::NoVarEmpty)); |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 247 | } |
| 248 | |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 249 | for disk in &cfg.disks { |
Dylan Reid | 2056644 | 2018-04-02 15:06:15 -0700 | [diff] [blame] | 250 | // Special case '/proc/self/fd/*' paths. The FD is already open, just use it. |
| 251 | let mut raw_image: File = if disk.path.parent() == Some(Path::new("/proc/self/fd")) { |
| 252 | if !disk.path.is_file() { |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 253 | return Err(Box::new(Error::InvalidFdPath)); |
Dylan Reid | 2056644 | 2018-04-02 15:06:15 -0700 | [diff] [blame] | 254 | } |
| 255 | let raw_fd = disk.path.file_name() |
| 256 | .and_then(|fd_osstr| fd_osstr.to_str()) |
| 257 | .and_then(|fd_str| fd_str.parse::<c_int>().ok()) |
| 258 | .ok_or(Error::InvalidFdPath)?; |
Chirantan Ekbote | 5f78721 | 2018-05-31 15:31:31 -0700 | [diff] [blame] | 259 | // Safe because we will validate |raw_fd|. |
| 260 | unsafe { File::from_raw_fd(validate_raw_fd(raw_fd)?) } |
Dylan Reid | 2056644 | 2018-04-02 15:06:15 -0700 | [diff] [blame] | 261 | } else { |
| 262 | OpenOptions::new() |
| 263 | .read(true) |
Daniel Verkamp | de9ae03 | 2018-08-09 16:26:59 -0700 | [diff] [blame] | 264 | .write(!disk.read_only) |
Dylan Reid | 2056644 | 2018-04-02 15:06:15 -0700 | [diff] [blame] | 265 | .open(&disk.path) |
| 266 | .map_err(|e| Error::Disk(e))? |
| 267 | }; |
Stephen Barber | c79de2d | 2018-02-21 14:17:27 -0800 | [diff] [blame] | 268 | // Lock the disk image to prevent other crosvm instances from using it. |
Daniel Verkamp | de9ae03 | 2018-08-09 16:26:59 -0700 | [diff] [blame] | 269 | let lock_op = if disk.read_only { |
Stephen Barber | c79de2d | 2018-02-21 14:17:27 -0800 | [diff] [blame] | 270 | FlockOperation::LockShared |
Daniel Verkamp | de9ae03 | 2018-08-09 16:26:59 -0700 | [diff] [blame] | 271 | } else { |
| 272 | FlockOperation::LockExclusive |
Stephen Barber | c79de2d | 2018-02-21 14:17:27 -0800 | [diff] [blame] | 273 | }; |
| 274 | flock(&raw_image, lock_op, true).map_err(Error::DiskImageLock)?; |
| 275 | |
Dylan Reid | 88624f8 | 2018-01-11 09:20:16 -0800 | [diff] [blame] | 276 | let block_box: Box<devices::virtio::VirtioDevice> = match disk.disk_type { |
| 277 | DiskType::FlatFile => { // Access as a raw block device. |
Daniel Verkamp | 70589a0 | 2018-08-09 16:27:44 -0700 | [diff] [blame] | 278 | Box::new(devices::virtio::Block::new(raw_image, disk.read_only) |
Dylan Reid | 88624f8 | 2018-01-11 09:20:16 -0800 | [diff] [blame] | 279 | .map_err(|e| Error::BlockDeviceNew(e))?) |
| 280 | } |
| 281 | DiskType::Qcow => { // Valid qcow header present |
| 282 | let qcow_image = QcowFile::from(raw_image) |
| 283 | .map_err(|e| Error::QcowDeviceCreate(e))?; |
Daniel Verkamp | 70589a0 | 2018-08-09 16:27:44 -0700 | [diff] [blame] | 284 | Box::new(devices::virtio::Block::new(qcow_image, disk.read_only) |
Dylan Reid | 88624f8 | 2018-01-11 09:20:16 -0800 | [diff] [blame] | 285 | .map_err(|e| Error::BlockDeviceNew(e))?) |
| 286 | } |
| 287 | }; |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 288 | let jail = if cfg.multiprocess { |
| 289 | let policy_path: PathBuf = cfg.seccomp_policy_dir.join("block_device.policy"); |
| 290 | Some(create_base_minijail(empty_root_path, &policy_path)?) |
| 291 | } |
| 292 | else { |
| 293 | None |
| 294 | }; |
| 295 | |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 296 | devs.push(VirtioDeviceStub {dev: block_box, jail}); |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 297 | } |
| 298 | |
| 299 | let rng_box = Box::new(devices::virtio::Rng::new().map_err(Error::RngDeviceNew)?); |
| 300 | let rng_jail = if cfg.multiprocess { |
| 301 | let policy_path: PathBuf = cfg.seccomp_policy_dir.join("rng_device.policy"); |
| 302 | Some(create_base_minijail(empty_root_path, &policy_path)?) |
| 303 | } else { |
| 304 | None |
| 305 | }; |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 306 | devs.push(VirtioDeviceStub {dev: rng_box, jail: rng_jail}); |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 307 | |
Dylan Reid | 295ccac | 2017-11-06 14:06:24 -0800 | [diff] [blame] | 308 | let balloon_box = Box::new(devices::virtio::Balloon::new(balloon_device_socket) |
| 309 | .map_err(Error::BalloonDeviceNew)?); |
| 310 | let balloon_jail = if cfg.multiprocess { |
| 311 | let policy_path: PathBuf = cfg.seccomp_policy_dir.join("balloon_device.policy"); |
| 312 | Some(create_base_minijail(empty_root_path, &policy_path)?) |
| 313 | } else { |
| 314 | None |
| 315 | }; |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 316 | devs.push(VirtioDeviceStub {dev: balloon_box, jail: balloon_jail}); |
Dylan Reid | 295ccac | 2017-11-06 14:06:24 -0800 | [diff] [blame] | 317 | |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 318 | // We checked above that if the IP is defined, then the netmask is, too. |
Chirantan Ekbote | 5f78721 | 2018-05-31 15:31:31 -0700 | [diff] [blame] | 319 | if let Some(tap_fd) = cfg.tap_fd { |
| 320 | // Safe because we ensure that we get a unique handle to the fd. |
| 321 | let tap = unsafe { Tap::from_raw_fd(validate_raw_fd(tap_fd)?) }; |
| 322 | let net_box = Box::new(devices::virtio::Net::from(tap) |
| 323 | .map_err(|e| Error::NetDeviceNew(e))?); |
| 324 | |
| 325 | let jail = if cfg.multiprocess { |
| 326 | let policy_path: PathBuf = cfg.seccomp_policy_dir.join("net_device.policy"); |
| 327 | |
| 328 | Some(create_base_minijail(empty_root_path, &policy_path)?) |
| 329 | } else { |
| 330 | None |
| 331 | }; |
| 332 | |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 333 | devs.push(VirtioDeviceStub {dev: net_box, jail}); |
Chirantan Ekbote | 5f78721 | 2018-05-31 15:31:31 -0700 | [diff] [blame] | 334 | } else if let Some(host_ip) = cfg.host_ip { |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 335 | if let Some(netmask) = cfg.netmask { |
Stephen Barber | 308ff60 | 2018-02-13 22:47:07 -0800 | [diff] [blame] | 336 | if let Some(mac_address) = cfg.mac_address { |
| 337 | let net_box: Box<devices::virtio::VirtioDevice> = if cfg.vhost_net { |
| 338 | Box::new(devices::virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(host_ip, |
| 339 | netmask, |
| 340 | mac_address, |
| 341 | &mem) |
| 342 | .map_err(|e| Error::VhostNetDeviceNew(e))?) |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 343 | } else { |
Stephen Barber | 308ff60 | 2018-02-13 22:47:07 -0800 | [diff] [blame] | 344 | Box::new(devices::virtio::Net::<Tap>::new(host_ip, netmask, mac_address) |
| 345 | .map_err(|e| Error::NetDeviceNew(e))?) |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 346 | }; |
| 347 | |
Stephen Barber | 308ff60 | 2018-02-13 22:47:07 -0800 | [diff] [blame] | 348 | let jail = if cfg.multiprocess { |
| 349 | let policy_path: PathBuf = if cfg.vhost_net { |
| 350 | cfg.seccomp_policy_dir.join("vhost_net_device.policy") |
| 351 | } else { |
| 352 | cfg.seccomp_policy_dir.join("net_device.policy") |
| 353 | }; |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 354 | |
Stephen Barber | 308ff60 | 2018-02-13 22:47:07 -0800 | [diff] [blame] | 355 | Some(create_base_minijail(empty_root_path, &policy_path)?) |
| 356 | } else { |
| 357 | None |
| 358 | }; |
| 359 | |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 360 | devs.push(VirtioDeviceStub {dev: net_box, jail}); |
Stephen Barber | 308ff60 | 2018-02-13 22:47:07 -0800 | [diff] [blame] | 361 | } |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 362 | } |
| 363 | } |
| 364 | |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 365 | if let Some(wayland_socket_path) = cfg.wayland_socket_path.as_ref() { |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 366 | let jailed_wayland_path = Path::new("/wayland-0"); |
| 367 | |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 368 | let wl_box = Box::new(devices::virtio::Wl::new(if cfg.multiprocess { |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 369 | &jailed_wayland_path |
| 370 | } else { |
| 371 | wayland_socket_path.as_path() |
| 372 | }, |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 373 | wayland_device_socket) |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 374 | .map_err(Error::WaylandDeviceNew)?); |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 375 | |
| 376 | let jail = if cfg.multiprocess { |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 377 | let policy_path: PathBuf = cfg.seccomp_policy_dir.join("wl_device.policy"); |
Chirantan Ekbote | 293c61c | 2018-01-04 16:19:17 -0800 | [diff] [blame] | 378 | let mut jail = create_base_minijail(empty_root_path, &policy_path)?; |
| 379 | |
| 380 | // Create a tmpfs in the device's root directory so that we can bind mount the |
| 381 | // wayland socket into it. The size=67108864 is size=64*1024*1024 or size=64MB. |
| 382 | jail.mount_with_data(Path::new("none"), Path::new("/"), "tmpfs", |
| 383 | (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize, |
| 384 | "size=67108864") |
| 385 | .unwrap(); |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 386 | |
| 387 | // Bind mount the wayland socket into jail's root. This is necessary since each |
| 388 | // new wayland context must open() the socket. |
| 389 | jail.mount_bind(wayland_socket_path.as_path(), jailed_wayland_path, true) |
| 390 | .unwrap(); |
| 391 | |
| 392 | // Set the uid/gid for the jailed process, and give a basic id map. This |
| 393 | // is required for the above bind mount to work. |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 394 | let crosvm_user_group = CStr::from_bytes_with_nul(b"crosvm\0").unwrap(); |
| 395 | let crosvm_uid = match get_user_id(&crosvm_user_group) { |
| 396 | Ok(u) => u, |
| 397 | Err(e) => { |
| 398 | warn!("falling back to current user id for Wayland: {:?}", e); |
| 399 | geteuid() |
| 400 | } |
| 401 | }; |
Chirantan Ekbote | 0ba70d8 | 2018-01-24 13:47:58 -0800 | [diff] [blame] | 402 | let crosvm_gid = match get_group_id(&crosvm_user_group) { |
| 403 | Ok(u) => u, |
| 404 | Err(e) => { |
| 405 | warn!("falling back to current group id for Wayland: {:?}", e); |
| 406 | getegid() |
| 407 | } |
| 408 | }; |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 409 | jail.change_uid(crosvm_uid); |
Chirantan Ekbote | 0ba70d8 | 2018-01-24 13:47:58 -0800 | [diff] [blame] | 410 | jail.change_gid(crosvm_gid); |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 411 | jail.uidmap(&format!("{0} {0} 1", crosvm_uid)) |
| 412 | .map_err(Error::SettingUidMap)?; |
Chirantan Ekbote | 0ba70d8 | 2018-01-24 13:47:58 -0800 | [diff] [blame] | 413 | jail.gidmap(&format!("{0} {0} 1", crosvm_gid)) |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 414 | .map_err(Error::SettingGidMap)?; |
| 415 | |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 416 | Some(jail) |
| 417 | } else { |
| 418 | None |
| 419 | }; |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 420 | devs.push(VirtioDeviceStub { |
| 421 | dev: wl_box, |
| 422 | jail, |
| 423 | }); |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 424 | } |
| 425 | |
| 426 | if let Some(cid) = cfg.cid { |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 427 | let vsock_box = Box::new(devices::virtio::vhost::Vsock::new(cid, &mem) |
| 428 | .map_err(Error::VhostVsockDeviceNew)?); |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 429 | |
| 430 | let jail = if cfg.multiprocess { |
| 431 | let policy_path: PathBuf = cfg.seccomp_policy_dir.join("vhost_vsock_device.policy"); |
| 432 | |
| 433 | Some(create_base_minijail(empty_root_path, &policy_path)?) |
| 434 | } else { |
| 435 | None |
| 436 | }; |
| 437 | |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 438 | devs.push(VirtioDeviceStub {dev: vsock_box, jail}); |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 439 | } |
| 440 | |
Zach Reizner | 3a8100a | 2017-09-13 19:15:43 -0700 | [diff] [blame] | 441 | #[cfg(feature = "gpu")] |
| 442 | { |
| 443 | if cfg.gpu { |
| 444 | let gpu_box = |
| 445 | Box::new(devices::virtio::Gpu::new(_exit_evt |
| 446 | .try_clone() |
| 447 | .map_err(Error::CloneEventFd)?)); |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 448 | let jail = if cfg.multiprocess { |
Zach Reizner | 3a8100a | 2017-09-13 19:15:43 -0700 | [diff] [blame] | 449 | error!("jail for virtio-gpu is unimplemented"); |
| 450 | unimplemented!(); |
| 451 | } else { |
| 452 | None |
| 453 | }; |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 454 | devs.push(VirtioDeviceStub {dev: gpu_box, jail}); |
Zach Reizner | 3a8100a | 2017-09-13 19:15:43 -0700 | [diff] [blame] | 455 | } |
| 456 | } |
| 457 | |
Chirantan Ekbote | ebd5681 | 2018-04-16 19:32:04 -0700 | [diff] [blame] | 458 | let chronos_user_group = CStr::from_bytes_with_nul(b"chronos\0").unwrap(); |
| 459 | let chronos_uid = match get_user_id(&chronos_user_group) { |
| 460 | Ok(u) => u, |
| 461 | Err(e) => { |
| 462 | warn!("falling back to current user id for 9p: {:?}", e); |
| 463 | geteuid() |
| 464 | } |
| 465 | }; |
| 466 | let chronos_gid = match get_group_id(&chronos_user_group) { |
| 467 | Ok(u) => u, |
| 468 | Err(e) => { |
| 469 | warn!("falling back to current group id for 9p: {:?}", e); |
| 470 | getegid() |
| 471 | } |
| 472 | }; |
| 473 | |
| 474 | for &(ref src, ref tag) in &cfg.shared_dirs { |
| 475 | let (jail, root) = if cfg.multiprocess { |
| 476 | let policy_path: PathBuf = cfg.seccomp_policy_dir.join("9p_device.policy"); |
| 477 | let mut jail = create_base_minijail(empty_root_path, &policy_path)?; |
| 478 | |
| 479 | // The shared directory becomes the root of the device's file system. |
| 480 | let root = Path::new("/"); |
| 481 | jail.mount_bind(&src, root, true).unwrap(); |
| 482 | |
| 483 | // Set the uid/gid for the jailed process, and give a basic id map. This |
| 484 | // is required for the above bind mount to work. |
| 485 | jail.change_uid(chronos_uid); |
| 486 | jail.change_gid(chronos_gid); |
| 487 | jail.uidmap(&format!("{0} {0} 1", chronos_uid)) |
| 488 | .map_err(Error::SettingUidMap)?; |
| 489 | jail.gidmap(&format!("{0} {0} 1", chronos_gid)) |
| 490 | .map_err(Error::SettingGidMap)?; |
| 491 | |
| 492 | (Some(jail), root) |
| 493 | } else { |
| 494 | // There's no bind mount so we tell the server to treat the source directory as the |
| 495 | // root. The double deref here converts |src| from a &PathBuf into a &Path. |
| 496 | (None, &**src) |
| 497 | }; |
| 498 | |
| 499 | let p9_box = Box::new(devices::virtio::P9::new(root, tag).map_err(Error::P9DeviceNew)?); |
| 500 | |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 501 | devs.push(VirtioDeviceStub {dev: p9_box, jail}); |
Chirantan Ekbote | ebd5681 | 2018-04-16 19:32:04 -0700 | [diff] [blame] | 502 | } |
| 503 | |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 504 | Ok(devs) |
Sonny Rao | bb7da42 | 2018-02-13 20:37:48 -0800 | [diff] [blame] | 505 | } |
| 506 | |
Mark Ryan | 6ed5aea | 2018-04-20 13:52:35 +0100 | [diff] [blame] | 507 | fn setup_vcpu_signal_handler() -> Result<()> { |
| 508 | unsafe { |
| 509 | extern "C" fn handle_signal() {} |
| 510 | // Our signal handler does nothing and is trivially async signal safe. |
| 511 | register_signal_handler(SIGRTMIN() + 0, handle_signal) |
| 512 | .map_err(Error::RegisterSignalHandler)?; |
| 513 | } |
| 514 | block_signal(SIGRTMIN() + 0).map_err(Error::BlockSignal)?; |
| 515 | Ok(()) |
| 516 | } |
| 517 | |
Sonny Rao | bb7da42 | 2018-02-13 20:37:48 -0800 | [diff] [blame] | 518 | fn run_vcpu(vcpu: Vcpu, |
| 519 | cpu_id: u32, |
| 520 | start_barrier: Arc<Barrier>, |
| 521 | io_bus: devices::Bus, |
| 522 | mmio_bus: devices::Bus, |
| 523 | exit_evt: EventFd, |
| 524 | kill_signaled: Arc<AtomicBool>) -> Result<JoinHandle<()>> { |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 525 | thread::Builder::new() |
| 526 | .name(format!("crosvm_vcpu{}", cpu_id)) |
| 527 | .spawn(move || { |
Mark Ryan | 6ed5aea | 2018-04-20 13:52:35 +0100 | [diff] [blame] | 528 | let mut sig_ok = true; |
| 529 | match get_blocked_signals() { |
| 530 | Ok(mut v) => { |
| 531 | v.retain(|&x| x != SIGRTMIN() + 0); |
| 532 | if let Err(e) = vcpu.set_signal_mask(&v) { |
| 533 | error!( |
| 534 | "Failed to set the KVM_SIGNAL_MASK for vcpu {} : {:?}", |
| 535 | cpu_id, e |
| 536 | ); |
| 537 | sig_ok = false; |
| 538 | } |
| 539 | } |
| 540 | Err(e) => { |
| 541 | error!( |
| 542 | "Failed to retrieve signal mask for vcpu {} : {:?}", |
| 543 | cpu_id, e |
| 544 | ); |
| 545 | sig_ok = false; |
| 546 | } |
| 547 | }; |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 548 | |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 549 | start_barrier.wait(); |
Mark Ryan | 6ed5aea | 2018-04-20 13:52:35 +0100 | [diff] [blame] | 550 | |
| 551 | while sig_ok { |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 552 | let run_res = vcpu.run(); |
| 553 | match run_res { |
| 554 | Ok(run) => { |
| 555 | match run { |
| 556 | VcpuExit::IoIn(addr, data) => { |
| 557 | io_bus.read(addr as u64, data); |
| 558 | } |
| 559 | VcpuExit::IoOut(addr, data) => { |
| 560 | io_bus.write(addr as u64, data); |
| 561 | } |
| 562 | VcpuExit::MmioRead(addr, data) => { |
| 563 | mmio_bus.read(addr, data); |
| 564 | } |
| 565 | VcpuExit::MmioWrite(addr, data) => { |
| 566 | mmio_bus.write(addr, data); |
| 567 | } |
| 568 | VcpuExit::Hlt => break, |
| 569 | VcpuExit::Shutdown => break, |
Sonny Rao | 6ce158f | 2018-03-27 17:12:58 -0700 | [diff] [blame] | 570 | VcpuExit::SystemEvent(_, _) => |
| 571 | //TODO handle reboot and crash events |
| 572 | kill_signaled.store(true, Ordering::SeqCst), |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 573 | r => warn!("unexpected vcpu exit: {:?}", r), |
| 574 | } |
| 575 | } |
| 576 | Err(e) => { |
| 577 | match e.errno() { |
| 578 | libc::EAGAIN | libc::EINTR => {}, |
| 579 | _ => { |
| 580 | error!("vcpu hit unknown error: {:?}", e); |
| 581 | break; |
| 582 | } |
| 583 | } |
| 584 | } |
| 585 | } |
| 586 | if kill_signaled.load(Ordering::SeqCst) { |
| 587 | break; |
| 588 | } |
Mark Ryan | 6ed5aea | 2018-04-20 13:52:35 +0100 | [diff] [blame] | 589 | |
| 590 | // Try to clear the signal that we use to kick VCPU if it is |
| 591 | // pending before attempting to handle pause requests. |
| 592 | clear_signal(SIGRTMIN() + 0).expect("failed to clear pending signal"); |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 593 | } |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 594 | exit_evt |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 595 | .write(1) |
| 596 | .expect("failed to signal vcpu exit eventfd"); |
Zach Reizner | 8fb5211 | 2017-12-13 16:04:39 -0800 | [diff] [blame] | 597 | }) |
| 598 | .map_err(Error::SpawnVcpu) |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 599 | } |
| 600 | |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 601 | // Reads the contents of a file and converts them into a u64. |
| 602 | fn file_to_u64<P: AsRef<Path>>(path: P) -> io::Result<u64> { |
| 603 | let mut file = File::open(path)?; |
| 604 | |
| 605 | let mut buf = [0u8; 32]; |
| 606 | let count = file.read(&mut buf)?; |
| 607 | |
| 608 | let content = str::from_utf8(&buf[..count]) |
| 609 | .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; |
| 610 | content.trim().parse().map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) |
| 611 | } |
| 612 | |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 613 | pub fn run_config(cfg: Config) -> Result<()> { |
| 614 | if cfg.virtio_dev_info.multiprocess { |
| 615 | // Printing something to the syslog before entering minijail so that libc's syslogger has a |
| 616 | // chance to open files necessary for its operation, like `/etc/localtime`. After jailing, |
| 617 | // access to those files will not be possible. |
| 618 | info!("crosvm entering multiprocess mode"); |
| 619 | } |
| 620 | |
| 621 | let pci_devices = devices::PciDeviceList::new(); |
| 622 | |
| 623 | // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this |
| 624 | // before any jailed devices have been spawned, so that we can catch any of them that fail very |
| 625 | // quickly. |
| 626 | let sigchld_fd = SignalFd::new(libc::SIGCHLD).map_err(Error::CreateSignalFd)?; |
| 627 | |
| 628 | let components = VmComponents { |
| 629 | pci_devices, |
| 630 | memory_mb: (cfg.memory.unwrap_or(256) << 20) as u64, |
| 631 | vcpu_count: cfg.vcpu_count.unwrap_or(1), |
| 632 | kernel_image: File::open(cfg.kernel_path.as_path()) |
| 633 | .map_err(|e| Error::OpenKernel(cfg.kernel_path.clone(), e))?, |
| 634 | extra_kernel_params: cfg.params, |
| 635 | wayland_dmabuf: cfg.virtio_dev_info.wayland_dmabuf, |
| 636 | }; |
| 637 | |
| 638 | let mut control_sockets = Vec::new(); |
| 639 | if let Some(ref path_string) = cfg.socket_path { |
| 640 | let path = Path::new(path_string); |
| 641 | let dgram = UnixDatagram::bind(path).map_err(Error::CreateSocket)?; |
| 642 | control_sockets.push(UnlinkUnixDatagram(dgram)); |
| 643 | }; |
| 644 | let (wayland_host_socket, wayland_device_socket) = UnixDatagram::pair() |
| 645 | .map_err(Error::CreateSocket)?; |
| 646 | control_sockets.push(UnlinkUnixDatagram(wayland_host_socket)); |
| 647 | // Balloon gets a special socket so balloon requests can be forwarded from the main process. |
| 648 | let (balloon_host_socket, balloon_device_socket) = UnixDatagram::pair() |
| 649 | .map_err(Error::CreateSocket)?; |
| 650 | |
| 651 | let virtio_dev_info = cfg.virtio_dev_info; |
| 652 | let linux = Arch::build_vm(components, |
| 653 | |m, e| create_virtio_devs(virtio_dev_info, m, e, |
| 654 | wayland_device_socket, |
| 655 | balloon_device_socket)) |
| 656 | .map_err(Error::BuildingVm)?; |
| 657 | run_control(linux, control_sockets, balloon_host_socket, sigchld_fd) |
Dylan Reid | 0ed91ab | 2018-05-31 15:42:18 -0700 | [diff] [blame] | 658 | } |
| 659 | |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 660 | fn run_control(mut linux: RunnableLinuxVm, |
| 661 | control_sockets: Vec<UnlinkUnixDatagram>, |
| 662 | balloon_host_socket: UnixDatagram, |
| 663 | sigchld_fd: SignalFd) -> Result<()> { |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 664 | const MAX_VM_FD_RECV: usize = 1; |
| 665 | |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 666 | // Paths to get the currently available memory and the low memory threshold. |
| 667 | const LOWMEM_MARGIN: &'static str = "/sys/kernel/mm/chromeos-low_mem/margin"; |
| 668 | const LOWMEM_AVAILABLE: &'static str = "/sys/kernel/mm/chromeos-low_mem/available"; |
| 669 | |
| 670 | // The amount of additional memory to claim back from the VM whenever the system is |
| 671 | // low on memory. |
| 672 | const ONE_GB: u64 = (1 << 30); |
| 673 | |
Dylan Reid | 0ed91ab | 2018-05-31 15:42:18 -0700 | [diff] [blame] | 674 | let max_balloon_memory = match linux.vm.get_memory().memory_size() { |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 675 | // If the VM has at least 1.5 GB, the balloon driver can consume all but the last 1 GB. |
| 676 | n if n >= (ONE_GB / 2) * 3 => n - ONE_GB, |
| 677 | // Otherwise, if the VM has at least 500MB the balloon driver will consume at most |
| 678 | // half of it. |
| 679 | n if n >= (ONE_GB / 2) => n / 2, |
| 680 | // Otherwise, the VM is too small for us to take memory away from it. |
| 681 | _ => 0, |
| 682 | }; |
| 683 | let mut current_balloon_memory: u64 = 0; |
| 684 | let balloon_memory_increment: u64 = max_balloon_memory / 16; |
| 685 | |
Zach Reizner | 5bed0d2 | 2018-03-28 02:31:11 -0700 | [diff] [blame] | 686 | #[derive(PollToken)] |
| 687 | enum Token { |
| 688 | Exit, |
| 689 | Stdin, |
| 690 | ChildSignal, |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 691 | CheckAvailableMemory, |
| 692 | LowMemory, |
| 693 | LowmemTimer, |
Zach Reizner | 5bed0d2 | 2018-03-28 02:31:11 -0700 | [diff] [blame] | 694 | VmControl { index: usize }, |
| 695 | } |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 696 | |
| 697 | let stdin_handle = stdin(); |
| 698 | let stdin_lock = stdin_handle.lock(); |
| 699 | stdin_lock |
| 700 | .set_raw_mode() |
| 701 | .expect("failed to set terminal raw mode"); |
| 702 | |
Zach Reizner | 5bed0d2 | 2018-03-28 02:31:11 -0700 | [diff] [blame] | 703 | let poll_ctx = PollContext::new().map_err(Error::CreatePollContext)?; |
Dylan Reid | 0ed91ab | 2018-05-31 15:42:18 -0700 | [diff] [blame] | 704 | poll_ctx.add(&linux.exit_evt, Token::Exit).map_err(Error::PollContextAdd)?; |
Zach Reizner | 5bed0d2 | 2018-03-28 02:31:11 -0700 | [diff] [blame] | 705 | if let Err(e) = poll_ctx.add(&stdin_handle, Token::Stdin) { |
| 706 | warn!("failed to add stdin to poll context: {:?}", e); |
| 707 | } |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 708 | poll_ctx.add(&sigchld_fd, Token::ChildSignal).map_err(Error::PollContextAdd)?; |
| 709 | for (index, socket) in control_sockets.iter().enumerate() { |
Zach Reizner | 5bed0d2 | 2018-03-28 02:31:11 -0700 | [diff] [blame] | 710 | poll_ctx.add(socket.as_ref(), Token::VmControl{ index }).map_err(Error::PollContextAdd)?; |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 711 | } |
| 712 | |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 713 | // Watch for low memory notifications and take memory back from the VM. |
Dylan Reid | f11e6ed | 2018-07-31 10:24:06 -0700 | [diff] [blame] | 714 | let low_mem = File::open("/dev/chromeos-low-mem").ok(); |
| 715 | if let Some(ref low_mem) = low_mem { |
| 716 | poll_ctx.add(low_mem, Token::LowMemory).map_err(Error::PollContextAdd)?; |
| 717 | } else { |
| 718 | warn!("Unable to open low mem indicator, maybe not a chrome os kernel"); |
| 719 | } |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 720 | |
| 721 | // Used to rate limit balloon requests. |
| 722 | let mut lowmem_timer = TimerFd::new().map_err(Error::CreateTimerFd)?; |
| 723 | poll_ctx.add(&lowmem_timer, Token::LowmemTimer).map_err(Error::PollContextAdd)?; |
| 724 | |
| 725 | // Used to check whether it's ok to start giving memory back to the VM. |
| 726 | let mut freemem_timer = TimerFd::new().map_err(Error::CreateTimerFd)?; |
| 727 | poll_ctx.add(&freemem_timer, Token::CheckAvailableMemory).map_err(Error::PollContextAdd)?; |
| 728 | |
| 729 | // Used to add jitter to timer values so that we don't have a thundering herd problem when |
| 730 | // multiple VMs are running. |
| 731 | let mut rng = thread_rng(); |
| 732 | let lowmem_jitter_ms = Range::new(0, 200); |
| 733 | let freemem_jitter_secs = Range::new(0, 12); |
| 734 | let interval_jitter_secs = Range::new(0, 6); |
| 735 | |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 736 | let mut vcpu_handles = Vec::with_capacity(linux.vcpus.len() as usize); |
| 737 | let vcpu_thread_barrier = Arc::new(Barrier::new((linux.vcpus.len() + 1) as usize)); |
| 738 | let kill_signaled = Arc::new(AtomicBool::new(false)); |
| 739 | setup_vcpu_signal_handler()?; |
| 740 | for (cpu_id, vcpu) in linux.vcpus.into_iter().enumerate() { |
| 741 | let handle = run_vcpu(vcpu, |
| 742 | cpu_id as u32, |
| 743 | vcpu_thread_barrier.clone(), |
| 744 | linux.io_bus.clone(), |
| 745 | linux.mmio_bus.clone(), |
| 746 | linux.exit_evt.try_clone().map_err(Error::CloneEventFd)?, |
| 747 | kill_signaled.clone())?; |
| 748 | vcpu_handles.push(handle); |
| 749 | } |
| 750 | vcpu_thread_barrier.wait(); |
| 751 | |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 752 | let mut scm = Scm::new(MAX_VM_FD_RECV); |
| 753 | |
| 754 | 'poll: loop { |
Zach Reizner | 5bed0d2 | 2018-03-28 02:31:11 -0700 | [diff] [blame] | 755 | let events = { |
| 756 | match poll_ctx.wait() { |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 757 | Ok(v) => v, |
| 758 | Err(e) => { |
| 759 | error!("failed to poll: {:?}", e); |
| 760 | break; |
| 761 | } |
| 762 | } |
| 763 | }; |
Zach Reizner | 5bed0d2 | 2018-03-28 02:31:11 -0700 | [diff] [blame] | 764 | for event in events.iter_readable() { |
| 765 | match event.token() { |
| 766 | Token::Exit => { |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 767 | info!("vcpu requested shutdown"); |
| 768 | break 'poll; |
| 769 | } |
Zach Reizner | 5bed0d2 | 2018-03-28 02:31:11 -0700 | [diff] [blame] | 770 | Token::Stdin => { |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 771 | let mut out = [0u8; 64]; |
| 772 | match stdin_lock.read_raw(&mut out[..]) { |
| 773 | Ok(0) => { |
| 774 | // Zero-length read indicates EOF. Remove from pollables. |
Zach Reizner | 5bed0d2 | 2018-03-28 02:31:11 -0700 | [diff] [blame] | 775 | let _ = poll_ctx.delete(&stdin_handle); |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 776 | }, |
| 777 | Err(e) => { |
| 778 | warn!("error while reading stdin: {:?}", e); |
Zach Reizner | 5bed0d2 | 2018-03-28 02:31:11 -0700 | [diff] [blame] | 779 | let _ = poll_ctx.delete(&stdin_handle); |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 780 | }, |
| 781 | Ok(count) => { |
Dylan Reid | 0ed91ab | 2018-05-31 15:42:18 -0700 | [diff] [blame] | 782 | linux.stdio_serial |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 783 | .lock() |
| 784 | .unwrap() |
| 785 | .queue_input_bytes(&out[..count]) |
| 786 | .expect("failed to queue bytes into serial port"); |
| 787 | }, |
| 788 | } |
| 789 | } |
Zach Reizner | 5bed0d2 | 2018-03-28 02:31:11 -0700 | [diff] [blame] | 790 | Token::ChildSignal => { |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 791 | // Print all available siginfo structs, then exit the loop. |
| 792 | loop { |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 793 | let result = sigchld_fd.read().map_err(Error::SignalFd)?; |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 794 | if let Some(siginfo) = result { |
| 795 | error!("child {} died: signo {}, status {}, code {}", |
| 796 | siginfo.ssi_pid, |
| 797 | siginfo.ssi_signo, |
| 798 | siginfo.ssi_status, |
| 799 | siginfo.ssi_code); |
| 800 | } |
| 801 | break 'poll; |
| 802 | } |
| 803 | } |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 804 | Token::CheckAvailableMemory => { |
| 805 | // Acknowledge the timer. |
| 806 | freemem_timer.wait().map_err(Error::TimerFd)?; |
| 807 | if current_balloon_memory == 0 { |
| 808 | // Nothing to see here. |
| 809 | if let Err(e) = freemem_timer.clear() { |
| 810 | warn!("unable to clear available memory check timer: {}", e); |
| 811 | } |
| 812 | continue; |
| 813 | } |
| 814 | |
| 815 | // Otherwise see if we can free up some memory. |
| 816 | let margin = file_to_u64(LOWMEM_MARGIN).map_err(Error::ReadLowmemMargin)?; |
| 817 | let available = file_to_u64(LOWMEM_AVAILABLE).map_err(Error::ReadLowmemAvailable)?; |
| 818 | |
| 819 | // `available` and `margin` are specified in MB while `balloon_memory_increment` is in |
| 820 | // bytes. So to correctly compare them we need to turn the increment value into MB. |
| 821 | if available >= margin + 2*(balloon_memory_increment >> 20) { |
| 822 | current_balloon_memory = if current_balloon_memory >= balloon_memory_increment { |
| 823 | current_balloon_memory - balloon_memory_increment |
| 824 | } else { |
| 825 | 0 |
| 826 | }; |
| 827 | let mut buf = [0u8; mem::size_of::<u64>()]; |
| 828 | LittleEndian::write_u64(&mut buf, current_balloon_memory); |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 829 | if let Err(e) = balloon_host_socket.send(&buf) { |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 830 | warn!("failed to send memory value to balloon device: {}", e); |
| 831 | } |
| 832 | } |
| 833 | } |
| 834 | Token::LowMemory => { |
Dylan Reid | f11e6ed | 2018-07-31 10:24:06 -0700 | [diff] [blame] | 835 | if let Some(ref low_mem) = low_mem { |
| 836 | let old_balloon_memory = current_balloon_memory; |
| 837 | current_balloon_memory = |
| 838 | min(current_balloon_memory + balloon_memory_increment, |
| 839 | max_balloon_memory); |
| 840 | if current_balloon_memory != old_balloon_memory { |
| 841 | let mut buf = [0u8; mem::size_of::<u64>()]; |
| 842 | LittleEndian::write_u64(&mut buf, current_balloon_memory); |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 843 | if let Err(e) = balloon_host_socket.send(&buf) { |
Dylan Reid | f11e6ed | 2018-07-31 10:24:06 -0700 | [diff] [blame] | 844 | warn!("failed to send memory value to balloon device: {}", e); |
| 845 | } |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 846 | } |
Dylan Reid | f11e6ed | 2018-07-31 10:24:06 -0700 | [diff] [blame] | 847 | |
| 848 | // Stop polling the lowmem device until the timer fires. |
| 849 | poll_ctx.delete(low_mem).map_err(Error::PollContextDelete)?; |
| 850 | |
| 851 | // Add some jitter to the timer so that if there are multiple VMs running |
| 852 | // they don't all start ballooning at exactly the same time. |
| 853 | let lowmem_dur = |
| 854 | Duration::from_millis(1000 + lowmem_jitter_ms.ind_sample(&mut rng)); |
| 855 | lowmem_timer.reset(lowmem_dur, None).map_err(Error::ResetTimerFd)?; |
| 856 | |
| 857 | // Also start a timer to check when we can start giving memory back. Do the |
| 858 | // first check after a minute (with jitter) and subsequent checks after |
| 859 | // every 30 seconds (with jitter). |
| 860 | let freemem_dur = |
| 861 | Duration::from_secs(60 + freemem_jitter_secs.ind_sample(&mut rng)); |
| 862 | let freemem_int = |
| 863 | Duration::from_secs(30 + interval_jitter_secs.ind_sample(&mut rng)); |
| 864 | freemem_timer |
| 865 | .reset(freemem_dur, Some(freemem_int)) |
| 866 | .map_err(Error::ResetTimerFd)?; |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 867 | } |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 868 | } |
| 869 | Token::LowmemTimer => { |
| 870 | // Acknowledge the timer. |
| 871 | lowmem_timer.wait().map_err(Error::TimerFd)?; |
| 872 | |
Dylan Reid | f11e6ed | 2018-07-31 10:24:06 -0700 | [diff] [blame] | 873 | if let Some(ref low_mem) = low_mem { |
| 874 | // Start polling the lowmem device again. |
| 875 | poll_ctx.add(low_mem, Token::LowMemory).map_err(Error::PollContextAdd)?; |
| 876 | } |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 877 | } |
Zach Reizner | 5bed0d2 | 2018-03-28 02:31:11 -0700 | [diff] [blame] | 878 | Token::VmControl { index } => { |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 879 | if let Some(socket) = control_sockets.get(index as usize) { |
Zach Reizner | 5bed0d2 | 2018-03-28 02:31:11 -0700 | [diff] [blame] | 880 | match VmRequest::recv(&mut scm, socket.as_ref()) { |
| 881 | Ok(request) => { |
| 882 | let mut running = true; |
| 883 | let response = |
Dylan Reid | 0ed91ab | 2018-05-31 15:42:18 -0700 | [diff] [blame] | 884 | request.execute(&mut linux.vm, |
| 885 | &mut linux.resources, |
David Reveman | 52ba4e5 | 2018-04-22 21:42:09 -0400 | [diff] [blame] | 886 | &mut running, |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 887 | &balloon_host_socket); |
Zach Reizner | 5bed0d2 | 2018-03-28 02:31:11 -0700 | [diff] [blame] | 888 | if let Err(e) = response.send(&mut scm, socket.as_ref()) { |
| 889 | error!("failed to send VmResponse: {:?}", e); |
| 890 | } |
| 891 | if !running { |
| 892 | info!("control socket requested exit"); |
| 893 | break 'poll; |
| 894 | } |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 895 | } |
Zach Reizner | 5bed0d2 | 2018-03-28 02:31:11 -0700 | [diff] [blame] | 896 | Err(e) => error!("failed to recv VmRequest: {:?}", e), |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 897 | } |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 898 | } |
| 899 | } |
Zach Reizner | 5bed0d2 | 2018-03-28 02:31:11 -0700 | [diff] [blame] | 900 | } |
| 901 | } |
| 902 | for event in events.iter_hungup() { |
| 903 | // It's possible more data is readable and buffered while the socket is hungup, so |
| 904 | // don't delete the socket from the poll context until we're sure all the data is |
| 905 | // read. |
| 906 | if !event.readable() { |
| 907 | match event.token() { |
| 908 | Token::Exit => {}, |
| 909 | Token::Stdin => { |
| 910 | let _ = poll_ctx.delete(&stdin_handle); |
| 911 | }, |
| 912 | Token::ChildSignal => {}, |
Chirantan Ekbote | 448516e | 2018-07-24 16:07:42 -0700 | [diff] [blame] | 913 | Token::CheckAvailableMemory => {}, |
| 914 | Token::LowMemory => {}, |
| 915 | Token::LowmemTimer => {}, |
Zach Reizner | 5bed0d2 | 2018-03-28 02:31:11 -0700 | [diff] [blame] | 916 | Token::VmControl { index } => { |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 917 | if let Some(socket) = control_sockets.get(index as usize) { |
Zach Reizner | 5bed0d2 | 2018-03-28 02:31:11 -0700 | [diff] [blame] | 918 | let _ = poll_ctx.delete(socket.as_ref()); |
| 919 | } |
| 920 | }, |
| 921 | } |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 922 | } |
| 923 | } |
| 924 | } |
| 925 | |
| 926 | // vcpu threads MUST see the kill signaled flag, otherwise they may |
| 927 | // re-enter the VM. |
Dylan Reid | 059a188 | 2018-07-23 17:58:09 -0700 | [diff] [blame^] | 928 | kill_signaled.store(true, Ordering::SeqCst); |
| 929 | for handle in vcpu_handles { |
Dmitry Torokhov | cd40533 | 2018-02-16 16:25:54 -0800 | [diff] [blame] | 930 | match handle.kill(SIGRTMIN() + 0) { |
Zach Reizner | 39aa26b | 2017-12-12 18:03:23 -0800 | [diff] [blame] | 931 | Ok(_) => { |
| 932 | if let Err(e) = handle.join() { |
| 933 | error!("failed to join vcpu thread: {:?}", e); |
| 934 | } |
| 935 | } |
| 936 | Err(e) => error!("failed to kill vcpu thread: {:?}", e), |
| 937 | } |
| 938 | } |
| 939 | |
| 940 | stdin_lock |
| 941 | .set_canon_mode() |
| 942 | .expect("failed to restore canonical mode for terminal"); |
| 943 | |
| 944 | Ok(()) |
| 945 | } |