blob: b1d27321357491fa025e459d8824d9e65fe6c353 [file] [log] [blame]
Zach Reizner39aa26b2017-12-12 18:03:23 -08001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use std;
6use std::ffi::{CString, CStr};
7use std::fmt;
Sonny Raoed517d12018-02-13 22:09:43 -08008use std::error;
Zach Reizner39aa26b2017-12-12 18:03:23 -08009use std::fs::{File, OpenOptions, remove_file};
Sonny Raoed517d12018-02-13 22:09:43 -080010use std::io::{self, stdin};
Zach Reizner39aa26b2017-12-12 18:03:23 -080011use std::os::unix::net::UnixDatagram;
12use std::path::{Path, PathBuf};
Zach Reizner39aa26b2017-12-12 18:03:23 -080013use std::sync::atomic::{AtomicBool, Ordering};
14use std::sync::{Arc, Mutex, Barrier};
15use std::thread;
16use std::thread::JoinHandle;
17
18use libc;
19
Zach Reizner8fb52112017-12-13 16:04:39 -080020use device_manager;
Zach Reizner39aa26b2017-12-12 18:03:23 -080021use devices;
22use io_jail::{self, Minijail};
23use kernel_cmdline;
Zach Reizner39aa26b2017-12-12 18:03:23 -080024use kvm::*;
Jason D. Clinton865323d2017-09-27 22:04:03 -060025use net_util::Tap;
Dylan Reid88624f82018-01-11 09:20:16 -080026use qcow::{self, QcowFile};
Zach Reizner39aa26b2017-12-12 18:03:23 -080027use sys_util::*;
28use sys_util;
Jason D. Clinton865323d2017-09-27 22:04:03 -060029use vhost;
Zach Reiznerd3a7a1f2017-12-12 20:19:25 -080030use vm_control::VmRequest;
Zach Reizner39aa26b2017-12-12 18:03:23 -080031
32use Config;
Dylan Reid88624f82018-01-11 09:20:16 -080033use DiskType;
Zach Reizner39aa26b2017-12-12 18:03:23 -080034
Sonny Raoed517d12018-02-13 22:09:43 -080035use arch::LinuxArch;
36
37#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
38use x86_64::X8664arch as Arch;
Sonny Rao2ffa0cb2018-02-26 17:27:40 -080039#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
40use aarch64::AArch64 as Arch;
Zach Reizner39aa26b2017-12-12 18:03:23 -080041
42pub enum Error {
Dylan Reid295ccac2017-11-06 14:06:24 -080043 BalloonDeviceNew(devices::virtio::BalloonError),
Zach Reizner39aa26b2017-12-12 18:03:23 -080044 BlockDeviceNew(sys_util::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080045 ChownWaylandRoot(sys_util::Error),
46 CloneEventFd(sys_util::Error),
47 Cmdline(kernel_cmdline::Error),
48 CreateEventFd(sys_util::Error),
Sonny Raoed517d12018-02-13 22:09:43 -080049 CreateGuestMemory(Box<error::Error>),
50 CreateIrqChip(Box<error::Error>),
Zach Reizner8fb52112017-12-13 16:04:39 -080051 CreateKvm(sys_util::Error),
Zach Reizner5bed0d22018-03-28 02:31:11 -070052 CreatePollContext(sys_util::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080053 CreateSignalFd(sys_util::SignalFdError),
54 CreateSocket(io::Error),
55 CreateVcpu(sys_util::Error),
Sonny Raoed517d12018-02-13 22:09:43 -080056 CreateVm(Box<error::Error>),
Zach Reizner39aa26b2017-12-12 18:03:23 -080057 DeviceJail(io_jail::Error),
58 DevicePivotRoot(io_jail::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080059 Disk(io::Error),
Stephen Barberc79de2d2018-02-21 14:17:27 -080060 DiskImageLock(sys_util::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080061 GetWaylandGroup(sys_util::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080062 NetDeviceNew(devices::virtio::NetError),
63 NoVarEmpty,
64 OpenKernel(PathBuf, io::Error),
Zach Reizner5bed0d22018-03-28 02:31:11 -070065 PollContextAdd(sys_util::Error),
Dylan Reid88624f82018-01-11 09:20:16 -080066 QcowDeviceCreate(qcow::Error),
Dylan Reid295ccac2017-11-06 14:06:24 -080067 RegisterBalloon(device_manager::Error),
Zach Reizner39aa26b2017-12-12 18:03:23 -080068 RegisterBlock(device_manager::Error),
Zach Reizner39aa26b2017-12-12 18:03:23 -080069 RegisterIrqfd(sys_util::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080070 RegisterNet(device_manager::Error),
Zach Reizner39aa26b2017-12-12 18:03:23 -080071 RegisterRng(device_manager::Error),
Zach Reizner8fb52112017-12-13 16:04:39 -080072 RegisterVsock(device_manager::Error),
73 RegisterWayland(device_manager::Error),
Zach Reizner39aa26b2017-12-12 18:03:23 -080074 RngDeviceNew(devices::virtio::RngError),
Zach Reizner8fb52112017-12-13 16:04:39 -080075 SettingGidMap(io_jail::Error),
76 SettingUidMap(io_jail::Error),
77 SetTssAddr(sys_util::Error),
78 SignalFd(sys_util::SignalFdError),
79 SpawnVcpu(io::Error),
80 VhostNetDeviceNew(devices::virtio::vhost::Error),
81 VhostVsockDeviceNew(devices::virtio::vhost::Error),
82 WaylandDeviceNew(sys_util::Error),
83 WaylandTempDir(sys_util::Error),
Sonny Raoed517d12018-02-13 22:09:43 -080084 SetupSystemMemory(Box<error::Error>),
Sonny Raoed517d12018-02-13 22:09:43 -080085 ConfigureVcpu(Box<error::Error>),
Sonny Raoed517d12018-02-13 22:09:43 -080086 LoadKernel(Box<error::Error>),
Sonny Raoed517d12018-02-13 22:09:43 -080087 SetupIoBus(Box<error::Error>),
Sonny Raoed517d12018-02-13 22:09:43 -080088 SetupMMIOBus(Box<error::Error>),
Zach Reizner39aa26b2017-12-12 18:03:23 -080089}
90
91impl fmt::Display for Error {
92 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
93 match self {
Dylan Reid295ccac2017-11-06 14:06:24 -080094 &Error::BalloonDeviceNew(ref e) => write!(f, "failed to create balloon: {:?}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -080095 &Error::BlockDeviceNew(ref e) => write!(f, "failed to create block device: {:?}", e),
Zach Reizner8fb52112017-12-13 16:04:39 -080096 &Error::ChownWaylandRoot(ref e) => {
97 write!(f, "error chowning wayland root directory: {:?}", e)
98 }
99 &Error::CloneEventFd(ref e) => write!(f, "failed to clone eventfd: {:?}", e),
100 &Error::Cmdline(ref e) => write!(f, "the given kernel command line was invalid: {}", e),
101 &Error::CreateEventFd(ref e) => write!(f, "failed to create eventfd: {:?}", e),
102 &Error::CreateGuestMemory(ref e) => write!(f, "failed to create guest memory: {:?}", e),
103 &Error::CreateIrqChip(ref e) => {
104 write!(f, "failed to create in-kernel IRQ chip: {:?}", e)
105 }
106 &Error::CreateKvm(ref e) => write!(f, "failed to open /dev/kvm: {:?}", e),
Zach Reizner5bed0d22018-03-28 02:31:11 -0700107 &Error::CreatePollContext(ref e) => write!(f, "failed to create poll context: {:?}", e),
Zach Reizner8fb52112017-12-13 16:04:39 -0800108 &Error::CreateSignalFd(ref e) => write!(f, "failed to create signalfd: {:?}", e),
109 &Error::CreateSocket(ref e) => write!(f, "failed to create socket: {}", e),
110 &Error::CreateVcpu(ref e) => write!(f, "failed to create VCPU: {:?}", e),
111 &Error::CreateVm(ref e) => write!(f, "failed to create KVM VM object: {:?}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800112 &Error::DeviceJail(ref e) => write!(f, "failed to jail device: {}", e),
113 &Error::DevicePivotRoot(ref e) => write!(f, "failed to pivot root device: {}", e),
Zach Reizner8fb52112017-12-13 16:04:39 -0800114 &Error::Disk(ref e) => write!(f, "failed to load disk image: {}", e),
Stephen Barberc79de2d2018-02-21 14:17:27 -0800115 &Error::DiskImageLock(ref e) => write!(f, "failed to lock disk image: {:?}", e),
Zach Reizner8fb52112017-12-13 16:04:39 -0800116 &Error::GetWaylandGroup(ref e) => {
117 write!(f, "could not find gid for wayland group: {:?}", e)
118 }
Zach Reizner8fb52112017-12-13 16:04:39 -0800119 &Error::NetDeviceNew(ref e) => write!(f, "failed to set up virtio networking: {:?}", e),
120 &Error::NoVarEmpty => write!(f, "/var/empty doesn't exist, can't jail devices."),
121 &Error::OpenKernel(ref p, ref e) => {
122 write!(f, "failed to open kernel image {:?}: {}", p, e)
123 }
Zach Reizner5bed0d22018-03-28 02:31:11 -0700124 &Error::PollContextAdd(ref e) => write!(f, "failed to add fd to poll context: {:?}", e),
Dylan Reid88624f82018-01-11 09:20:16 -0800125 &Error::QcowDeviceCreate(ref e) => {
126 write!(f, "failed to read qcow formatted file {:?}", e)
127 }
Dylan Reid295ccac2017-11-06 14:06:24 -0800128 &Error::RegisterBalloon(ref e) => {
129 write!(f, "error registering balloon device: {:?}", e)
130 },
Zach Reizner8fb52112017-12-13 16:04:39 -0800131 &Error::RegisterBlock(ref e) => write!(f, "error registering block device: {:?}", e),
132 &Error::RegisterIrqfd(ref e) => write!(f, "error registering irqfd: {:?}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800133 &Error::RegisterNet(ref e) => write!(f, "error registering net device: {:?}", e),
134 &Error::RegisterRng(ref e) => write!(f, "error registering rng device: {:?}", e),
Zach Reizner8fb52112017-12-13 16:04:39 -0800135 &Error::RegisterVsock(ref e) => {
136 write!(f, "error registering virtual socket device: {:?}", e)
137 }
Zach Reizner39aa26b2017-12-12 18:03:23 -0800138 &Error::RegisterWayland(ref e) => write!(f, "error registering wayland device: {}", e),
Zach Reizner8fb52112017-12-13 16:04:39 -0800139 &Error::RngDeviceNew(ref e) => write!(f, "failed to set up rng: {:?}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800140 &Error::SettingGidMap(ref e) => write!(f, "error setting GID map: {}", e),
Zach Reizner8fb52112017-12-13 16:04:39 -0800141 &Error::SettingUidMap(ref e) => write!(f, "error setting UID map: {}", e),
142 &Error::SetTssAddr(ref e) => write!(f, "failed to set TSS address: {:?}", e),
143 &Error::SignalFd(ref e) => write!(f, "failed to read signal fd: {:?}", e),
144 &Error::SpawnVcpu(ref e) => write!(f, "failed to spawn VCPU thread: {:?}", e),
145 &Error::VhostNetDeviceNew(ref e) => {
146 write!(f, "failed to set up vhost networking: {:?}", e)
147 }
148 &Error::VhostVsockDeviceNew(ref e) => {
149 write!(f, "failed to set up virtual socket device: {:?}", e)
150 }
151 &Error::WaylandDeviceNew(ref e) => {
152 write!(f, "failed to create wayland device: {:?}", e)
153 }
154 &Error::WaylandTempDir(ref e) => {
155 write!(f, "failed to create wayland device jail directroy: {:?}", e)
156 }
Sonny Raoed517d12018-02-13 22:09:43 -0800157 &Error::SetupSystemMemory(ref e) => write!(f, "error setting up system memory: {}", e),
Sonny Raoed517d12018-02-13 22:09:43 -0800158 &Error::ConfigureVcpu(ref e) => write!(f, "failed to configure vcpu: {}", e),
Sonny Raoed517d12018-02-13 22:09:43 -0800159 &Error::LoadKernel(ref e) => write!(f, "failed to load kernel: {}", e),
Sonny Raoed517d12018-02-13 22:09:43 -0800160 &Error::SetupIoBus(ref e) => write!(f, "failed to setup iobus: {}", e),
Sonny Raoed517d12018-02-13 22:09:43 -0800161 &Error::SetupMMIOBus(ref e) => write!(f, "failed to setup mmio bus: {}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800162 }
163 }
164}
165
166type Result<T> = std::result::Result<T, Error>;
167
168struct UnlinkUnixDatagram(UnixDatagram);
169impl AsRef<UnixDatagram> for UnlinkUnixDatagram {
170 fn as_ref(&self) -> &UnixDatagram{
171 &self.0
172 }
173}
174impl Drop for UnlinkUnixDatagram {
175 fn drop(&mut self) {
176 if let Ok(addr) = self.0.local_addr() {
177 if let Some(path) = addr.as_pathname() {
178 if let Err(e) = remove_file(path) {
179 warn!("failed to remove control socket file: {:?}", e);
180 }
181 }
182 }
183 }
184}
185
Zach Reizner39aa26b2017-12-12 18:03:23 -0800186fn create_base_minijail(root: &Path, seccomp_policy: &Path) -> Result<Minijail> {
187 // All child jails run in a new user namespace without any users mapped,
188 // they run as nobody unless otherwise configured.
189 let mut j = Minijail::new().map_err(|e| Error::DeviceJail(e))?;
190 j.namespace_pids();
191 j.namespace_user();
192 j.namespace_user_disable_setgroups();
193 // Don't need any capabilities.
194 j.use_caps(0);
195 // Create a new mount namespace with an empty root FS.
196 j.namespace_vfs();
197 j.enter_pivot_root(root)
198 .map_err(|e| Error::DevicePivotRoot(e))?;
199 // Run in an empty network namespace.
200 j.namespace_net();
201 // Apply the block device seccomp policy.
202 j.no_new_privs();
Stephen Barber3b1d8a52018-01-06 17:34:51 -0800203 // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP, which will correctly kill
204 // the entire device process if a worker thread commits a seccomp violation.
205 j.set_seccomp_filter_tsync();
Zach Reizner043ddc52018-04-03 20:47:21 -0700206 #[cfg(debug_assertions)]
207 j.log_seccomp_filter_failures();
Zach Reizner39aa26b2017-12-12 18:03:23 -0800208 j.parse_seccomp_filters(seccomp_policy)
209 .map_err(|e| Error::DeviceJail(e))?;
210 j.use_seccomp_filter();
211 // Don't do init setup.
212 j.run_as_init();
213 Ok(j)
214}
215
Zach Reizner8fb52112017-12-13 16:04:39 -0800216fn setup_mmio_bus(cfg: &Config,
217 vm: &mut Vm,
218 mem: &GuestMemory,
Zach Reiznerd3a7a1f2017-12-12 20:19:25 -0800219 cmdline: &mut kernel_cmdline::Cmdline,
220 control_sockets: &mut Vec<UnlinkUnixDatagram>,
221 balloon_device_socket: UnixDatagram)
222 -> Result<devices::Bus> {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800223 static DEFAULT_PIVOT_ROOT: &'static str = "/var/empty";
Sonny Raoed517d12018-02-13 22:09:43 -0800224 let mut device_manager = Arch::get_device_manager(vm, mem.clone()).
225 map_err(|e| Error::SetupMMIOBus(e))?;
Zach Reizner39aa26b2017-12-12 18:03:23 -0800226
227 // An empty directory for jailed device's pivot root.
228 let empty_root_path = Path::new(DEFAULT_PIVOT_ROOT);
229 if cfg.multiprocess && !empty_root_path.exists() {
230 return Err(Error::NoVarEmpty);
231 }
232
Zach Reizner8fb52112017-12-13 16:04:39 -0800233 for disk in &cfg.disks {
Dylan Reid88624f82018-01-11 09:20:16 -0800234 let mut raw_image = OpenOptions::new()
Zach Reizner39aa26b2017-12-12 18:03:23 -0800235 .read(true)
236 .write(disk.writable)
Zach Reizner8fb52112017-12-13 16:04:39 -0800237 .open(&disk.path)
Zach Reizner39aa26b2017-12-12 18:03:23 -0800238 .map_err(|e| Error::Disk(e))?;
Stephen Barberc79de2d2018-02-21 14:17:27 -0800239 // Lock the disk image to prevent other crosvm instances from using it.
240 let lock_op = if disk.writable {
241 FlockOperation::LockExclusive
242 } else {
243 FlockOperation::LockShared
244 };
245 flock(&raw_image, lock_op, true).map_err(Error::DiskImageLock)?;
246
Dylan Reid88624f82018-01-11 09:20:16 -0800247 let block_box: Box<devices::virtio::VirtioDevice> = match disk.disk_type {
248 DiskType::FlatFile => { // Access as a raw block device.
249 Box::new(devices::virtio::Block::new(raw_image)
250 .map_err(|e| Error::BlockDeviceNew(e))?)
251 }
252 DiskType::Qcow => { // Valid qcow header present
253 let qcow_image = QcowFile::from(raw_image)
254 .map_err(|e| Error::QcowDeviceCreate(e))?;
255 Box::new(devices::virtio::Block::new(qcow_image)
256 .map_err(|e| Error::BlockDeviceNew(e))?)
257 }
258 };
Zach Reizner39aa26b2017-12-12 18:03:23 -0800259 let jail = if cfg.multiprocess {
260 let policy_path: PathBuf = cfg.seccomp_policy_dir.join("block_device.policy");
261 Some(create_base_minijail(empty_root_path, &policy_path)?)
262 }
263 else {
264 None
265 };
266
Zach Reiznerd3a7a1f2017-12-12 20:19:25 -0800267 device_manager
268 .register_mmio(block_box, jail, cmdline)
269 .map_err(Error::RegisterBlock)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -0800270 }
271
272 let rng_box = Box::new(devices::virtio::Rng::new().map_err(Error::RngDeviceNew)?);
273 let rng_jail = if cfg.multiprocess {
274 let policy_path: PathBuf = cfg.seccomp_policy_dir.join("rng_device.policy");
275 Some(create_base_minijail(empty_root_path, &policy_path)?)
276 } else {
277 None
278 };
Zach Reiznerd3a7a1f2017-12-12 20:19:25 -0800279 device_manager
280 .register_mmio(rng_box, rng_jail, cmdline)
Zach Reizner39aa26b2017-12-12 18:03:23 -0800281 .map_err(Error::RegisterRng)?;
282
Dylan Reid295ccac2017-11-06 14:06:24 -0800283 let balloon_box = Box::new(devices::virtio::Balloon::new(balloon_device_socket)
284 .map_err(Error::BalloonDeviceNew)?);
285 let balloon_jail = if cfg.multiprocess {
286 let policy_path: PathBuf = cfg.seccomp_policy_dir.join("balloon_device.policy");
287 Some(create_base_minijail(empty_root_path, &policy_path)?)
288 } else {
289 None
290 };
Zach Reiznerd3a7a1f2017-12-12 20:19:25 -0800291 device_manager.register_mmio(balloon_box, balloon_jail, cmdline)
Dylan Reid295ccac2017-11-06 14:06:24 -0800292 .map_err(Error::RegisterBalloon)?;
293
Zach Reizner39aa26b2017-12-12 18:03:23 -0800294 // We checked above that if the IP is defined, then the netmask is, too.
295 if let Some(host_ip) = cfg.host_ip {
296 if let Some(netmask) = cfg.netmask {
Stephen Barber308ff602018-02-13 22:47:07 -0800297 if let Some(mac_address) = cfg.mac_address {
298 let net_box: Box<devices::virtio::VirtioDevice> = if cfg.vhost_net {
299 Box::new(devices::virtio::vhost::Net::<Tap, vhost::Net<Tap>>::new(host_ip,
300 netmask,
301 mac_address,
302 &mem)
303 .map_err(|e| Error::VhostNetDeviceNew(e))?)
Zach Reizner39aa26b2017-12-12 18:03:23 -0800304 } else {
Stephen Barber308ff602018-02-13 22:47:07 -0800305 Box::new(devices::virtio::Net::<Tap>::new(host_ip, netmask, mac_address)
306 .map_err(|e| Error::NetDeviceNew(e))?)
Zach Reizner39aa26b2017-12-12 18:03:23 -0800307 };
308
Stephen Barber308ff602018-02-13 22:47:07 -0800309 let jail = if cfg.multiprocess {
310 let policy_path: PathBuf = if cfg.vhost_net {
311 cfg.seccomp_policy_dir.join("vhost_net_device.policy")
312 } else {
313 cfg.seccomp_policy_dir.join("net_device.policy")
314 };
Zach Reizner39aa26b2017-12-12 18:03:23 -0800315
Stephen Barber308ff602018-02-13 22:47:07 -0800316 Some(create_base_minijail(empty_root_path, &policy_path)?)
317 } else {
318 None
319 };
320
321 device_manager
322 .register_mmio(net_box, jail, cmdline)
323 .map_err(Error::RegisterNet)?;
324 }
Zach Reizner39aa26b2017-12-12 18:03:23 -0800325 }
326 }
327
Zach Reizner8fb52112017-12-13 16:04:39 -0800328 if let Some(wayland_socket_path) = cfg.wayland_socket_path.as_ref() {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800329 let jailed_wayland_path = Path::new("/wayland-0");
330
Zach Reizner8fb52112017-12-13 16:04:39 -0800331 let (host_socket, device_socket) = UnixDatagram::pair().map_err(Error::CreateSocket)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -0800332 control_sockets.push(UnlinkUnixDatagram(host_socket));
333 let wl_box = Box::new(devices::virtio::Wl::new(if cfg.multiprocess {
Zach Reizner8fb52112017-12-13 16:04:39 -0800334 &jailed_wayland_path
335 } else {
336 wayland_socket_path.as_path()
337 },
338 device_socket)
339 .map_err(Error::WaylandDeviceNew)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800340
341 let jail = if cfg.multiprocess {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800342 let policy_path: PathBuf = cfg.seccomp_policy_dir.join("wl_device.policy");
Chirantan Ekbote293c61c2018-01-04 16:19:17 -0800343 let mut jail = create_base_minijail(empty_root_path, &policy_path)?;
344
345 // Create a tmpfs in the device's root directory so that we can bind mount the
346 // wayland socket into it. The size=67108864 is size=64*1024*1024 or size=64MB.
347 jail.mount_with_data(Path::new("none"), Path::new("/"), "tmpfs",
348 (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
349 "size=67108864")
350 .unwrap();
Zach Reizner39aa26b2017-12-12 18:03:23 -0800351
352 // Bind mount the wayland socket into jail's root. This is necessary since each
353 // new wayland context must open() the socket.
354 jail.mount_bind(wayland_socket_path.as_path(), jailed_wayland_path, true)
355 .unwrap();
356
357 // Set the uid/gid for the jailed process, and give a basic id map. This
358 // is required for the above bind mount to work.
Zach Reizner39aa26b2017-12-12 18:03:23 -0800359 let crosvm_user_group = CStr::from_bytes_with_nul(b"crosvm\0").unwrap();
360 let crosvm_uid = match get_user_id(&crosvm_user_group) {
361 Ok(u) => u,
362 Err(e) => {
363 warn!("falling back to current user id for Wayland: {:?}", e);
364 geteuid()
365 }
366 };
Chirantan Ekbote0ba70d82018-01-24 13:47:58 -0800367 let crosvm_gid = match get_group_id(&crosvm_user_group) {
368 Ok(u) => u,
369 Err(e) => {
370 warn!("falling back to current group id for Wayland: {:?}", e);
371 getegid()
372 }
373 };
Zach Reizner39aa26b2017-12-12 18:03:23 -0800374 jail.change_uid(crosvm_uid);
Chirantan Ekbote0ba70d82018-01-24 13:47:58 -0800375 jail.change_gid(crosvm_gid);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800376 jail.uidmap(&format!("{0} {0} 1", crosvm_uid))
377 .map_err(Error::SettingUidMap)?;
Chirantan Ekbote0ba70d82018-01-24 13:47:58 -0800378 jail.gidmap(&format!("{0} {0} 1", crosvm_gid))
Zach Reizner39aa26b2017-12-12 18:03:23 -0800379 .map_err(Error::SettingGidMap)?;
380
Zach Reizner39aa26b2017-12-12 18:03:23 -0800381 Some(jail)
382 } else {
383 None
384 };
385 device_manager
Zach Reiznerd3a7a1f2017-12-12 20:19:25 -0800386 .register_mmio(wl_box, jail, cmdline)
Zach Reizner39aa26b2017-12-12 18:03:23 -0800387 .map_err(Error::RegisterWayland)?;
388 }
389
390 if let Some(cid) = cfg.cid {
Zach Reizner8fb52112017-12-13 16:04:39 -0800391 let vsock_box = Box::new(devices::virtio::vhost::Vsock::new(cid, &mem)
392 .map_err(Error::VhostVsockDeviceNew)?);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800393
394 let jail = if cfg.multiprocess {
395 let policy_path: PathBuf = cfg.seccomp_policy_dir.join("vhost_vsock_device.policy");
396
397 Some(create_base_minijail(empty_root_path, &policy_path)?)
398 } else {
399 None
400 };
401
Zach Reiznerd3a7a1f2017-12-12 20:19:25 -0800402 device_manager
403 .register_mmio(vsock_box, jail, cmdline)
404 .map_err(Error::RegisterVsock)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -0800405 }
406
Zach Reiznerd3a7a1f2017-12-12 20:19:25 -0800407 Ok(device_manager.bus)
Zach Reizner39aa26b2017-12-12 18:03:23 -0800408}
409
Zach Reiznerd3a7a1f2017-12-12 20:19:25 -0800410
Zach Reizner8fb52112017-12-13 16:04:39 -0800411fn setup_vcpu(kvm: &Kvm,
412 vm: &Vm,
413 cpu_id: u32,
Sonny Raobb7da422018-02-13 20:37:48 -0800414 vcpu_count: u32)
415 -> Result<Vcpu> {
Zach Reizner8fb52112017-12-13 16:04:39 -0800416 let vcpu = Vcpu::new(cpu_id as libc::c_ulong, &kvm, &vm)
417 .map_err(Error::CreateVcpu)?;
Sonny Raoa7fae252018-03-27 16:30:51 -0700418 Arch::configure_vcpu(vm.get_memory(), &kvm, &vm, &vcpu, cpu_id as u64, vcpu_count as u64).
Sonny Raoed517d12018-02-13 22:09:43 -0800419 map_err(Error::ConfigureVcpu)?;
Sonny Raobb7da422018-02-13 20:37:48 -0800420 Ok(vcpu)
421}
422
423fn run_vcpu(vcpu: Vcpu,
424 cpu_id: u32,
425 start_barrier: Arc<Barrier>,
426 io_bus: devices::Bus,
427 mmio_bus: devices::Bus,
428 exit_evt: EventFd,
429 kill_signaled: Arc<AtomicBool>) -> Result<JoinHandle<()>> {
Zach Reizner8fb52112017-12-13 16:04:39 -0800430 thread::Builder::new()
431 .name(format!("crosvm_vcpu{}", cpu_id))
432 .spawn(move || {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800433 unsafe {
434 extern "C" fn handle_signal() {}
435 // Our signal handler does nothing and is trivially async signal safe.
Dmitry Torokhovcd405332018-02-16 16:25:54 -0800436 register_signal_handler(SIGRTMIN() + 0, handle_signal)
Zach Reizner39aa26b2017-12-12 18:03:23 -0800437 .expect("failed to register vcpu signal handler");
438 }
439
Zach Reizner8fb52112017-12-13 16:04:39 -0800440 start_barrier.wait();
Zach Reizner39aa26b2017-12-12 18:03:23 -0800441 loop {
442 let run_res = vcpu.run();
443 match run_res {
444 Ok(run) => {
445 match run {
446 VcpuExit::IoIn(addr, data) => {
447 io_bus.read(addr as u64, data);
448 }
449 VcpuExit::IoOut(addr, data) => {
450 io_bus.write(addr as u64, data);
451 }
452 VcpuExit::MmioRead(addr, data) => {
453 mmio_bus.read(addr, data);
454 }
455 VcpuExit::MmioWrite(addr, data) => {
456 mmio_bus.write(addr, data);
457 }
458 VcpuExit::Hlt => break,
459 VcpuExit::Shutdown => break,
Sonny Rao6ce158f2018-03-27 17:12:58 -0700460 VcpuExit::SystemEvent(_, _) =>
461 //TODO handle reboot and crash events
462 kill_signaled.store(true, Ordering::SeqCst),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800463 r => warn!("unexpected vcpu exit: {:?}", r),
464 }
465 }
466 Err(e) => {
467 match e.errno() {
468 libc::EAGAIN | libc::EINTR => {},
469 _ => {
470 error!("vcpu hit unknown error: {:?}", e);
471 break;
472 }
473 }
474 }
475 }
476 if kill_signaled.load(Ordering::SeqCst) {
477 break;
478 }
479 }
Zach Reizner8fb52112017-12-13 16:04:39 -0800480 exit_evt
Zach Reizner39aa26b2017-12-12 18:03:23 -0800481 .write(1)
482 .expect("failed to signal vcpu exit eventfd");
Zach Reizner8fb52112017-12-13 16:04:39 -0800483 })
484 .map_err(Error::SpawnVcpu)
Zach Reizner39aa26b2017-12-12 18:03:23 -0800485}
486
Zach Reizner8fb52112017-12-13 16:04:39 -0800487fn run_control(vm: &mut Vm,
Zach Reizner39aa26b2017-12-12 18:03:23 -0800488 control_sockets: Vec<UnlinkUnixDatagram>,
Zach Reizner8fb52112017-12-13 16:04:39 -0800489 next_dev_pfn: &mut u64,
Zach Reizner39aa26b2017-12-12 18:03:23 -0800490 stdio_serial: Arc<Mutex<devices::Serial>>,
491 exit_evt: EventFd,
492 sigchld_fd: SignalFd,
493 kill_signaled: Arc<AtomicBool>,
Dylan Reidd4432042017-12-06 18:20:09 -0800494 vcpu_handles: Vec<JoinHandle<()>>,
Sonny Raoed517d12018-02-13 22:09:43 -0800495 balloon_host_socket: UnixDatagram,
496 _irqchip_fd: Option<File>)
Zach Reizner39aa26b2017-12-12 18:03:23 -0800497 -> Result<()> {
498 const MAX_VM_FD_RECV: usize = 1;
499
Zach Reizner5bed0d22018-03-28 02:31:11 -0700500 #[derive(PollToken)]
501 enum Token {
502 Exit,
503 Stdin,
504 ChildSignal,
505 VmControl { index: usize },
506 }
Zach Reizner39aa26b2017-12-12 18:03:23 -0800507
508 let stdin_handle = stdin();
509 let stdin_lock = stdin_handle.lock();
510 stdin_lock
511 .set_raw_mode()
512 .expect("failed to set terminal raw mode");
513
Zach Reizner5bed0d22018-03-28 02:31:11 -0700514 let poll_ctx = PollContext::new().map_err(Error::CreatePollContext)?;
515 poll_ctx.add(&exit_evt, Token::Exit).map_err(Error::PollContextAdd)?;
516 if let Err(e) = poll_ctx.add(&stdin_handle, Token::Stdin) {
517 warn!("failed to add stdin to poll context: {:?}", e);
518 }
519 poll_ctx.add(&sigchld_fd, Token::ChildSignal).map_err(Error::PollContextAdd)?;
520 for (index, socket) in control_sockets.iter().enumerate() {
521 poll_ctx.add(socket.as_ref(), Token::VmControl{ index }).map_err(Error::PollContextAdd)?;
Zach Reizner39aa26b2017-12-12 18:03:23 -0800522 }
523
Zach Reizner39aa26b2017-12-12 18:03:23 -0800524 let mut scm = Scm::new(MAX_VM_FD_RECV);
525
526 'poll: loop {
Zach Reizner5bed0d22018-03-28 02:31:11 -0700527 let events = {
528 match poll_ctx.wait() {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800529 Ok(v) => v,
530 Err(e) => {
531 error!("failed to poll: {:?}", e);
532 break;
533 }
534 }
535 };
Zach Reizner5bed0d22018-03-28 02:31:11 -0700536 for event in events.iter_readable() {
537 match event.token() {
538 Token::Exit => {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800539 info!("vcpu requested shutdown");
540 break 'poll;
541 }
Zach Reizner5bed0d22018-03-28 02:31:11 -0700542 Token::Stdin => {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800543 let mut out = [0u8; 64];
544 match stdin_lock.read_raw(&mut out[..]) {
545 Ok(0) => {
546 // Zero-length read indicates EOF. Remove from pollables.
Zach Reizner5bed0d22018-03-28 02:31:11 -0700547 let _ = poll_ctx.delete(&stdin_handle);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800548 },
549 Err(e) => {
550 warn!("error while reading stdin: {:?}", e);
Zach Reizner5bed0d22018-03-28 02:31:11 -0700551 let _ = poll_ctx.delete(&stdin_handle);
Zach Reizner39aa26b2017-12-12 18:03:23 -0800552 },
553 Ok(count) => {
554 stdio_serial
555 .lock()
556 .unwrap()
557 .queue_input_bytes(&out[..count])
558 .expect("failed to queue bytes into serial port");
559 },
560 }
561 }
Zach Reizner5bed0d22018-03-28 02:31:11 -0700562 Token::ChildSignal => {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800563 // Print all available siginfo structs, then exit the loop.
564 loop {
565 let result = sigchld_fd.read().map_err(Error::SignalFd)?;
566 if let Some(siginfo) = result {
567 error!("child {} died: signo {}, status {}, code {}",
568 siginfo.ssi_pid,
569 siginfo.ssi_signo,
570 siginfo.ssi_status,
571 siginfo.ssi_code);
572 }
573 break 'poll;
574 }
575 }
Zach Reizner5bed0d22018-03-28 02:31:11 -0700576 Token::VmControl { index } => {
577 if let Some(socket) = control_sockets.get(index as usize) {
578 match VmRequest::recv(&mut scm, socket.as_ref()) {
579 Ok(request) => {
580 let mut running = true;
581 let response =
582 request.execute(vm, next_dev_pfn,
583 &mut running, &balloon_host_socket);
584 if let Err(e) = response.send(&mut scm, socket.as_ref()) {
585 error!("failed to send VmResponse: {:?}", e);
586 }
587 if !running {
588 info!("control socket requested exit");
589 break 'poll;
590 }
Zach Reizner39aa26b2017-12-12 18:03:23 -0800591 }
Zach Reizner5bed0d22018-03-28 02:31:11 -0700592 Err(e) => error!("failed to recv VmRequest: {:?}", e),
Zach Reizner39aa26b2017-12-12 18:03:23 -0800593 }
Zach Reizner39aa26b2017-12-12 18:03:23 -0800594 }
595 }
Zach Reizner5bed0d22018-03-28 02:31:11 -0700596 }
597 }
598 for event in events.iter_hungup() {
599 // It's possible more data is readable and buffered while the socket is hungup, so
600 // don't delete the socket from the poll context until we're sure all the data is
601 // read.
602 if !event.readable() {
603 match event.token() {
604 Token::Exit => {},
605 Token::Stdin => {
606 let _ = poll_ctx.delete(&stdin_handle);
607 },
608 Token::ChildSignal => {},
609 Token::VmControl { index } => {
610 if let Some(socket) = control_sockets.get(index as usize) {
611 let _ = poll_ctx.delete(socket.as_ref());
612 }
613 },
614 }
Zach Reizner39aa26b2017-12-12 18:03:23 -0800615 }
616 }
617 }
618
619 // vcpu threads MUST see the kill signaled flag, otherwise they may
620 // re-enter the VM.
621 kill_signaled.store(true, Ordering::SeqCst);
622 for handle in vcpu_handles {
Dmitry Torokhovcd405332018-02-16 16:25:54 -0800623 match handle.kill(SIGRTMIN() + 0) {
Zach Reizner39aa26b2017-12-12 18:03:23 -0800624 Ok(_) => {
625 if let Err(e) = handle.join() {
626 error!("failed to join vcpu thread: {:?}", e);
627 }
628 }
629 Err(e) => error!("failed to kill vcpu thread: {:?}", e),
630 }
631 }
632
633 stdin_lock
634 .set_canon_mode()
635 .expect("failed to restore canonical mode for terminal");
636
637 Ok(())
638}
Zach Reizner8fb52112017-12-13 16:04:39 -0800639
640pub fn run_config(cfg: Config) -> Result<()> {
641 if cfg.multiprocess {
642 // Printing something to the syslog before entering minijail so that libc's syslogger has a
643 // chance to open files necessary for its operation, like `/etc/localtime`. After jailing,
644 // access to those files will not be possible.
645 info!("crosvm entering multiprocess mode");
646 }
647
648
649 // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
650 // before any jailed devices have been spawned, so that we can catch any of them that fail very
651 // quickly.
652 let sigchld_fd = SignalFd::new(libc::SIGCHLD).map_err(Error::CreateSignalFd)?;
653
654 let mut control_sockets = Vec::new();
655 if let Some(ref path) = cfg.socket_path {
656 let path = Path::new(path);
657 let control_socket = UnixDatagram::bind(path).map_err(Error::CreateSocket)?;
658 control_sockets.push(UnlinkUnixDatagram(control_socket));
659 }
660
661 let kill_signaled = Arc::new(AtomicBool::new(false));
662 let exit_evt = EventFd::new().map_err(Error::CreateEventFd)?;
663
Slava Malyugind1c76132018-02-27 17:09:52 -0800664 let mem_size = cfg.memory.unwrap_or(256) << 20;
Sonny Raoed517d12018-02-13 22:09:43 -0800665 let mem = Arch::setup_memory(mem_size as u64).map_err(|e| Error::CreateGuestMemory(e))?;
Zach Reizner8fb52112017-12-13 16:04:39 -0800666 let kvm = Kvm::new().map_err(Error::CreateKvm)?;
Sonny Raoed517d12018-02-13 22:09:43 -0800667 let mut vm = Arch::create_vm(&kvm, mem.clone()).map_err(|e| Error::CreateVm(e))?;
Zach Reizner8fb52112017-12-13 16:04:39 -0800668
Sonny Raobb7da422018-02-13 20:37:48 -0800669 let vcpu_count = cfg.vcpu_count.unwrap_or(1);
670 let mut vcpu_handles = Vec::with_capacity(vcpu_count as usize);
671 let vcpu_thread_barrier = Arc::new(Barrier::new((vcpu_count + 1) as usize));
672 let mut vcpus = Vec::with_capacity(vcpu_count as usize);
673 for cpu_id in 0..vcpu_count {
Sonny Raoed517d12018-02-13 22:09:43 -0800674 let vcpu = setup_vcpu(&kvm, &vm, cpu_id, vcpu_count)?;
Sonny Raobb7da422018-02-13 20:37:48 -0800675 vcpus.push(vcpu);
676 }
677
Sonny Raoed517d12018-02-13 22:09:43 -0800678 let irq_chip = Arch::create_irq_chip(&vm).map_err(|e| Error::CreateIrqChip(e))?;
Sonny Raoed517d12018-02-13 22:09:43 -0800679 let mut cmdline = Arch::get_base_linux_cmdline();
Sonny Raoed517d12018-02-13 22:09:43 -0800680 let mut next_dev_pfn = Arch::get_base_dev_pfn(mem_size as u64);
Sonny Raoed517d12018-02-13 22:09:43 -0800681 let (io_bus, stdio_serial) = Arch::setup_io_bus(&mut vm,
682 exit_evt.try_clone().
683 map_err(Error::CloneEventFd)?).
Sonny Rao43724a22018-02-01 15:52:58 -0800684 map_err(|e| Error::SetupIoBus(e))?;
Zach Reizner8fb52112017-12-13 16:04:39 -0800685
686 let (balloon_host_socket, balloon_device_socket) = UnixDatagram::pair()
687 .map_err(Error::CreateSocket)?;
688 let mmio_bus = setup_mmio_bus(&cfg,
689 &mut vm,
690 &mem,
691 &mut cmdline,
692 &mut control_sockets,
693 balloon_device_socket)?;
694
Zach Reiznerbb678712018-01-30 18:13:04 -0800695 for param in &cfg.params {
696 cmdline.insert_str(&param).map_err(Error::Cmdline)?;
Zach Reizner8fb52112017-12-13 16:04:39 -0800697 }
698
Sonny Raoed517d12018-02-13 22:09:43 -0800699 let mut kernel_image = File::open(cfg.kernel_path.as_path())
Zach Reizner8fb52112017-12-13 16:04:39 -0800700 .map_err(|e| Error::OpenKernel(cfg.kernel_path.clone(), e))?;
Sonny Rao43724a22018-02-01 15:52:58 -0800701
702 // separate out load_kernel from other setup to get a specific error for
703 // kernel loading
Sonny Raoed517d12018-02-13 22:09:43 -0800704 Arch::load_kernel(&mem, &mut kernel_image).map_err(|e| Error::LoadKernel(e))?;
Sonny Raoed517d12018-02-13 22:09:43 -0800705 Arch::setup_system_memory(&mem, mem_size as u64, vcpu_count,
706 &CString::new(cmdline).unwrap()).
Sonny Rao43724a22018-02-01 15:52:58 -0800707 map_err(|e| Error::SetupSystemMemory(e))?;
Zach Reizner8fb52112017-12-13 16:04:39 -0800708
Sonny Raobb7da422018-02-13 20:37:48 -0800709 for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
710 let handle = run_vcpu(vcpu,
711 cpu_id as u32,
712 vcpu_thread_barrier.clone(),
713 io_bus.clone(),
714 mmio_bus.clone(),
715 exit_evt.try_clone().map_err(Error::CloneEventFd)?,
716 kill_signaled.clone())?;
717 vcpu_handles.push(handle);
Zach Reizner8fb52112017-12-13 16:04:39 -0800718 }
719 vcpu_thread_barrier.wait();
720
721 run_control(&mut vm,
722 control_sockets,
723 &mut next_dev_pfn,
724 stdio_serial,
725 exit_evt,
726 sigchld_fd,
727 kill_signaled,
728 vcpu_handles,
Sonny Raoed517d12018-02-13 22:09:43 -0800729 balloon_host_socket,
730 irq_chip)
Zach Reizner8fb52112017-12-13 16:04:39 -0800731}