main: Allow some vcpus to be real time

Add an option to set some vcpus as real time. With a cooperative guest,
this allows for getting real time behavior for a subset of guest
threads.  The guest will use cpusets to ensure that only real time tasks
are run on the real-time CPU, and that vcpu thread is real-time on the
host leading to behavior close to host real-time threads.

BUG=b:142777321
TEST=crosvm --rt-cpus 1 [OPTION]...
     ps -AT -eo comm,sched,rtprio | grep crosvm_vcpu
     then check the sched policy of vcpu1 is 2 (SCHED_RR)

Change-Id: Ic8e7a6840bdbce2c90e518458f6d0c50a65ca3f2
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2152167
Reviewed-by: Dylan Reid <dgreid@chromium.org>
Reviewed-by: Suleiman Souhlal <suleiman@chromium.org>
Tested-by: Kansho Nishida <kansho@chromium.org>
Commit-Queue: Kansho Nishida <kansho@chromium.org>
diff --git a/src/linux.rs b/src/linux.rs
index 27a1472..7ea3f69 100644
--- a/src/linux.rs
+++ b/src/linux.rs
@@ -51,9 +51,9 @@
 use base::{
     self, block_signal, clear_signal, drop_capabilities, error, flock, get_blocked_signals,
     get_group_id, get_user_id, getegid, geteuid, info, register_rt_signal_handler,
-    set_cpu_affinity, signal, validate_raw_fd, warn, EventFd, ExternalMapping, FlockOperation,
-    Killable, MemoryMappingArena, PollContext, PollToken, Protection, ScopedEvent, SignalFd,
-    Terminal, TimerFd, WatchingEvents, SIGRTMIN,
+    set_cpu_affinity, set_rt_prio_limit, set_rt_round_robin, signal, validate_raw_fd, warn,
+    EventFd, ExternalMapping, FlockOperation, Killable, MemoryMappingArena, PollContext, PollToken,
+    Protection, ScopedEvent, SignalFd, Terminal, TimerFd, WatchingEvents, SIGRTMIN,
 };
 use vm_control::{
     BalloonControlCommand, BalloonControlRequestSocket, BalloonControlResponseSocket,
@@ -1509,6 +1509,7 @@
     vm: impl VmArch<Vcpu = V>,
     irq_chip: &mut impl IrqChipArch<V>,
     vcpu_count: usize,
+    run_rt: bool,
     vcpu_affinity: Vec<usize>,
     has_bios: bool,
     use_hypervisor_signals: bool,
@@ -1551,6 +1552,15 @@
         error!("Failed to enable core scheduling: {}", e);
     }
 
+    if run_rt {
+        const DEFAULT_VCPU_RT_LEVEL: u16 = 6;
+        if let Err(e) = set_rt_prio_limit(u64::from(DEFAULT_VCPU_RT_LEVEL))
+            .and_then(|_| set_rt_round_robin(i32::from(DEFAULT_VCPU_RT_LEVEL)))
+        {
+            warn!("Failed to set vcpu to real time: {}", e);
+        }
+    }
+
     if use_hypervisor_signals {
         let mut v = get_blocked_signals().map_err(Error::GetSignalMask)?;
         v.retain(|&x| x != SIGRTMIN() + 0);
@@ -1602,6 +1612,7 @@
     vm: impl VmArch<Vcpu = V> + 'static,
     mut irq_chip: impl IrqChipArch<V> + 'static,
     vcpu_count: usize,
+    run_rt: bool,
     vcpu_affinity: Vec<usize>,
     start_barrier: Arc<Barrier>,
     has_bios: bool,
@@ -1629,6 +1640,7 @@
                 vm,
                 &mut irq_chip,
                 vcpu_count,
+                run_rt,
                 vcpu_affinity,
                 has_bios,
                 use_hypervisor_signals,
@@ -1889,6 +1901,7 @@
             .iter()
             .map(|path| SDT::from_file(path).map_err(|e| Error::OpenAcpiTable(path.clone(), e)))
             .collect::<Result<Vec<SDT>>>()?,
+        rt_cpus: cfg.rt_cpus.clone(),
     };
 
     let control_server_socket = match &cfg.socket_path {
@@ -2081,6 +2094,7 @@
             linux.vm.try_clone().map_err(Error::CloneEventFd)?,
             linux.irq_chip.try_clone().map_err(Error::CloneEventFd)?,
             linux.vcpu_count,
+            linux.rt_cpus.contains(&cpu_id),
             linux.vcpu_affinity.clone(),
             vcpu_thread_barrier.clone(),
             linux.has_bios,