blob: 57a3b4cd3fc54d3082ffcfeb2163b54a1e9909f9 [file] [log] [blame]
// Copyright 2019 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
use base::{error, AsRawDescriptor, Error as SysError, Event, RawDescriptor, Tube, TubeError};
use bit_field::*;
use data_model::DataInit;
use remain::sorted;
use std::convert::TryInto;
use thiserror::Error;
use vm_control::{VmIrqRequest, VmIrqResponse};
use crate::pci::{PciCapability, PciCapabilityID};
const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048;
pub const MSIX_TABLE_ENTRIES_MODULO: u64 = 16;
pub const MSIX_PBA_ENTRIES_MODULO: u64 = 8;
pub const BITS_PER_PBA_ENTRY: usize = 64;
const FUNCTION_MASK_BIT: u16 = 0x4000;
const MSIX_ENABLE_BIT: u16 = 0x8000;
const MSIX_TABLE_ENTRY_MASK_BIT: u32 = 0x1;
#[derive(Clone, Default)]
struct MsixTableEntry {
msg_addr_lo: u32,
msg_addr_hi: u32,
msg_data: u32,
vector_ctl: u32,
}
impl MsixTableEntry {
fn masked(&self) -> bool {
self.vector_ctl & MSIX_TABLE_ENTRY_MASK_BIT == MSIX_TABLE_ENTRY_MASK_BIT
}
}
struct IrqfdGsi {
irqfd: Event,
gsi: u32,
}
/// Wrapper over MSI-X Capability Structure and MSI-X Tables
pub struct MsixConfig {
table_entries: Vec<MsixTableEntry>,
pba_entries: Vec<u64>,
irq_vec: Vec<Option<IrqfdGsi>>,
masked: bool,
enabled: bool,
msi_device_socket: Tube,
msix_num: u16,
pci_id: u32,
device_name: String,
}
#[sorted]
#[derive(Error, Debug)]
enum MsixError {
#[error("AddMsiRoute failed: {0}")]
AddMsiRoute(SysError),
#[error("failed to receive AddMsiRoute response: {0}")]
AddMsiRouteRecv(TubeError),
#[error("failed to send AddMsiRoute request: {0}")]
AddMsiRouteSend(TubeError),
#[error("AllocateOneMsi failed: {0}")]
AllocateOneMsi(SysError),
#[error("failed to receive AllocateOneMsi response: {0}")]
AllocateOneMsiRecv(TubeError),
#[error("failed to send AllocateOneMsi request: {0}")]
AllocateOneMsiSend(TubeError),
}
type MsixResult<T> = std::result::Result<T, MsixError>;
pub enum MsixStatus {
Changed,
EntryChanged(usize),
NothingToDo,
}
impl MsixConfig {
pub fn new(msix_vectors: u16, vm_socket: Tube, pci_id: u32, device_name: String) -> Self {
assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE);
let mut table_entries: Vec<MsixTableEntry> = Vec::new();
table_entries.resize_with(msix_vectors as usize, Default::default);
table_entries
.iter_mut()
.for_each(|entry| entry.vector_ctl |= MSIX_TABLE_ENTRY_MASK_BIT);
let mut pba_entries: Vec<u64> = Vec::new();
let num_pba_entries: usize =
((msix_vectors as usize) + BITS_PER_PBA_ENTRY - 1) / BITS_PER_PBA_ENTRY;
pba_entries.resize_with(num_pba_entries, Default::default);
let mut irq_vec = Vec::new();
irq_vec.resize_with(msix_vectors.into(), || None::<IrqfdGsi>);
MsixConfig {
table_entries,
pba_entries,
irq_vec,
masked: false,
enabled: false,
msi_device_socket: vm_socket,
msix_num: msix_vectors,
pci_id,
device_name,
}
}
/// Get the number of MSI-X vectors in this configuration.
pub fn num_vectors(&self) -> u16 {
self.msix_num
}
/// Check whether the Function Mask bit in Message Control word in set or not.
/// if 1, all of the vectors associated with the function are masked,
/// regardless of their per-vector Mask bit states.
/// If 0, each vector's Mask bit determines whether the vector is masked or not.
pub fn masked(&self) -> bool {
self.masked
}
/// Check whether the Function Mask bit in MSIX table Message Control
/// word in set or not.
/// If true, the vector is masked.
/// If false, the vector is unmasked.
pub fn table_masked(&self, index: usize) -> bool {
if index >= self.table_entries.len() {
true
} else {
self.table_entries[index].masked()
}
}
/// Check whether the MSI-X Enable bit in Message Control word in set or not.
/// if 1, the function is permitted to use MSI-X to request service.
pub fn enabled(&self) -> bool {
self.enabled
}
/// Read the MSI-X Capability Structure.
/// The top 2 bits in Message Control word are emulated and all other
/// bits are read only.
pub fn read_msix_capability(&self, data: u32) -> u32 {
let mut msg_ctl = (data >> 16) as u16;
msg_ctl &= !(MSIX_ENABLE_BIT | FUNCTION_MASK_BIT);
if self.enabled {
msg_ctl |= MSIX_ENABLE_BIT;
}
if self.masked {
msg_ctl |= FUNCTION_MASK_BIT;
}
(msg_ctl as u32) << 16 | (data & u16::max_value() as u32)
}
/// Write to the MSI-X Capability Structure.
/// Only the top 2 bits in Message Control Word are writable.
pub fn write_msix_capability(&mut self, offset: u64, data: &[u8]) -> MsixStatus {
if offset == 2 && data.len() == 2 {
let reg = u16::from_le_bytes([data[0], data[1]]);
let old_masked = self.masked;
let old_enabled = self.enabled;
self.masked = (reg & FUNCTION_MASK_BIT) == FUNCTION_MASK_BIT;
self.enabled = (reg & MSIX_ENABLE_BIT) == MSIX_ENABLE_BIT;
if !old_enabled && self.enabled {
if let Err(e) = self.msix_enable_all() {
error!("failed to enable MSI-X: {}", e);
self.enabled = false;
}
}
// If the Function Mask bit was set, and has just been cleared, it's
// important to go through the entire PBA to check if there was any
// pending MSI-X message to inject, given that the vector is not
// masked.
if old_masked && !self.masked {
for (index, entry) in self.table_entries.clone().iter().enumerate() {
if !entry.masked() && self.get_pba_bit(index as u16) == 1 {
self.inject_msix_and_clear_pba(index);
}
}
return MsixStatus::Changed;
} else if !old_masked && self.masked {
return MsixStatus::Changed;
}
} else {
error!(
"invalid write to MSI-X Capability Structure offset {:x}",
offset
);
}
MsixStatus::NothingToDo
}
fn add_msi_route(&self, index: u16, gsi: u32) -> MsixResult<()> {
let mut data: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0];
self.read_msix_table((index * 16).into(), data.as_mut());
let msi_address: u64 = u64::from_le_bytes(data);
let mut data: [u8; 4] = [0, 0, 0, 0];
self.read_msix_table((index * 16 + 8).into(), data.as_mut());
let msi_data: u32 = u32::from_le_bytes(data);
if msi_address == 0 {
return Ok(());
}
self.msi_device_socket
.send(&VmIrqRequest::AddMsiRoute {
gsi,
msi_address,
msi_data,
})
.map_err(MsixError::AddMsiRouteSend)?;
if let VmIrqResponse::Err(e) = self
.msi_device_socket
.recv()
.map_err(MsixError::AddMsiRouteRecv)?
{
return Err(MsixError::AddMsiRoute(e));
}
Ok(())
}
// Enable MSI-X
fn msix_enable_all(&mut self) -> MsixResult<()> {
for index in 0..self.irq_vec.len() {
self.msix_enable_one(index)?;
}
Ok(())
}
// Use a new MSI-X vector
// Create a new eventfd and bind them to a new msi
fn msix_enable_one(&mut self, index: usize) -> MsixResult<()> {
if self.irq_vec[index].is_some()
|| !self.enabled()
|| self.masked()
|| self.table_masked(index)
{
return Ok(());
}
let irqfd = Event::new().map_err(MsixError::AllocateOneMsi)?;
let request = VmIrqRequest::AllocateOneMsi {
irqfd,
device_id: self.pci_id,
queue_id: index as usize,
device_name: self.device_name.clone(),
};
self.msi_device_socket
.send(&request)
.map_err(MsixError::AllocateOneMsiSend)?;
let irq_num: u32 = match self
.msi_device_socket
.recv()
.map_err(MsixError::AllocateOneMsiRecv)?
{
VmIrqResponse::AllocateOneMsi { gsi } => gsi,
VmIrqResponse::Err(e) => return Err(MsixError::AllocateOneMsi(e)),
_ => unreachable!(),
};
self.irq_vec[index] = Some(IrqfdGsi {
irqfd: match request {
VmIrqRequest::AllocateOneMsi { irqfd, .. } => irqfd,
_ => unreachable!(),
},
gsi: irq_num,
});
self.add_msi_route(index as u16, irq_num)?;
Ok(())
}
/// Read MSI-X table
/// # Arguments
/// * 'offset' - the offset within the MSI-X Table
/// * 'data' - used to store the read results
///
/// For all accesses to MSI-X Table and MSI-X PBA fields, software must use aligned full
/// DWORD or aligned full QWORD transactions; otherwise, the result is undefined.
///
/// location: DWORD3 DWORD2 DWORD1 DWORD0
/// entry 0: Vector Control Msg Data Msg Upper Addr Msg Addr
/// entry 1: Vector Control Msg Data Msg Upper Addr Msg Addr
/// entry 2: Vector Control Msg Data Msg Upper Addr Msg Addr
/// ...
pub fn read_msix_table(&self, offset: u64, data: &mut [u8]) {
let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
match data.len() {
4 => {
let value = match modulo_offset {
0x0 => self.table_entries[index].msg_addr_lo,
0x4 => self.table_entries[index].msg_addr_hi,
0x8 => self.table_entries[index].msg_data,
0xc => self.table_entries[index].vector_ctl,
_ => {
error!("invalid offset");
0
}
};
data.copy_from_slice(&value.to_le_bytes());
}
8 => {
let value = match modulo_offset {
0x0 => {
(u64::from(self.table_entries[index].msg_addr_hi) << 32)
| u64::from(self.table_entries[index].msg_addr_lo)
}
0x8 => {
(u64::from(self.table_entries[index].vector_ctl) << 32)
| u64::from(self.table_entries[index].msg_data)
}
_ => {
error!("invalid offset");
0
}
};
data.copy_from_slice(&value.to_le_bytes());
}
_ => error!("invalid data length"),
};
}
/// Write to MSI-X table
///
/// Message Address: the contents of this field specifies the address
/// for the memory write transaction; different MSI-X vectors have
/// different Message Address values
/// Message Data: the contents of this field specifies the data driven
/// on AD[31::00] during the memory write transaction's data phase.
/// Vector Control: only bit 0 (Mask Bit) is not reserved: when this bit
/// is set, the function is prohibited from sending a message using
/// this MSI-X Table entry.
pub fn write_msix_table(&mut self, offset: u64, data: &[u8]) -> MsixStatus {
let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
// Store the value of the entry before modification
let old_entry = self.table_entries[index].clone();
match data.len() {
4 => {
let value = u32::from_le_bytes(data.try_into().unwrap());
match modulo_offset {
0x0 => self.table_entries[index].msg_addr_lo = value,
0x4 => self.table_entries[index].msg_addr_hi = value,
0x8 => self.table_entries[index].msg_data = value,
0xc => self.table_entries[index].vector_ctl = value,
_ => error!("invalid offset"),
};
}
8 => {
let value = u64::from_le_bytes(data.try_into().unwrap());
match modulo_offset {
0x0 => {
self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32;
self.table_entries[index].msg_addr_hi = (value >> 32) as u32;
}
0x8 => {
self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32;
self.table_entries[index].vector_ctl = (value >> 32) as u32;
}
_ => error!("invalid offset"),
};
}
_ => error!("invalid data length"),
};
let new_entry = self.table_entries[index].clone();
// This MSI-X vector is enabled for the first time.
if self.enabled()
&& !self.masked()
&& self.irq_vec[index].is_none()
&& old_entry.masked()
&& !new_entry.masked()
{
if let Err(e) = self.msix_enable_one(index) {
error!("failed to enable MSI-X vector {}: {}", index, e);
self.table_entries[index].vector_ctl |= MSIX_TABLE_ENTRY_MASK_BIT;
}
return MsixStatus::EntryChanged(index);
}
if self.enabled()
&& (old_entry.msg_addr_lo != new_entry.msg_addr_lo
|| old_entry.msg_addr_hi != new_entry.msg_addr_hi
|| old_entry.msg_data != new_entry.msg_data)
{
if let Some(irqfd_gsi) = &self.irq_vec[index] {
let irq_num = irqfd_gsi.gsi;
if let Err(e) = self.add_msi_route(index as u16, irq_num) {
error!("add_msi_route failed: {}", e);
}
}
}
// After the MSI-X table entry has been updated, it is necessary to
// check if the vector control masking bit has changed. In case the
// bit has been flipped from 1 to 0, we need to inject a MSI message
// if the corresponding pending bit from the PBA is set. Once the MSI
// has been injected, the pending bit in the PBA needs to be cleared.
// All of this is valid only if MSI-X has not been masked for the whole
// device.
// Check if bit has been flipped
if !self.masked() {
if old_entry.masked() && !self.table_entries[index].masked() {
if self.get_pba_bit(index as u16) == 1 {
self.inject_msix_and_clear_pba(index);
}
return MsixStatus::EntryChanged(index);
} else if !old_entry.masked() && self.table_entries[index].masked() {
return MsixStatus::EntryChanged(index);
}
}
MsixStatus::NothingToDo
}
/// Read PBA Entries
/// # Arguments
/// * 'offset' - the offset within the PBA entries
/// * 'data' - used to store the read results
///
/// Pending Bits[63::00]: For each Pending Bit that is set, the function
/// has a pending message for the associated MSI-X Table entry.
pub fn read_pba_entries(&self, offset: u64, data: &mut [u8]) {
let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize;
let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO;
match data.len() {
4 => {
let value: u32 = match modulo_offset {
0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32,
0x4 => (self.pba_entries[index] >> 32) as u32,
_ => {
error!("invalid offset");
0
}
};
data.copy_from_slice(&value.to_le_bytes());
}
8 => {
let value: u64 = match modulo_offset {
0x0 => self.pba_entries[index],
_ => {
error!("invalid offset");
0
}
};
data.copy_from_slice(&value.to_le_bytes());
}
_ => error!("invalid data length"),
}
}
/// Write to PBA Entries
///
/// Software should never write, and should only read Pending Bits.
/// If software writes to Pending Bits, the result is undefined.
pub fn write_pba_entries(&mut self, _offset: u64, _data: &[u8]) {
error!("Pending Bit Array is read only");
}
fn set_pba_bit(&mut self, vector: u16, set: bool) {
assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
let mut mask: u64 = (1 << shift) as u64;
if set {
self.pba_entries[index] |= mask;
} else {
mask = !mask;
self.pba_entries[index] &= mask;
}
}
fn get_pba_bit(&self, vector: u16) -> u8 {
assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8
}
fn inject_msix_and_clear_pba(&mut self, vector: usize) {
if let Some(irq) = &self.irq_vec[vector] {
irq.irqfd.write(1).unwrap();
}
// Clear the bit from PBA
self.set_pba_bit(vector as u16, false);
}
/// Inject virtual interrupt to the guest
///
/// # Arguments
/// * 'vector' - the index to the MSI-X Table entry
///
/// PCI Spec 3.0 6.8.3.5: while a vector is masked, the function is
/// prohibited from sending the associated message, and the function
/// must set the associated Pending bit whenever the function would
/// otherwise send the message. When software unmasks a vector whose
/// associated Pending bit is set, the function must schedule sending
/// the associated message, and clear the Pending bit as soon as the
/// message has been sent.
///
/// If the vector is unmasked, writing to irqfd which wakes up KVM to
/// inject virtual interrupt to the guest.
pub fn trigger(&mut self, vector: u16) {
if self.table_entries[vector as usize].masked() || self.masked() {
self.set_pba_bit(vector, true);
} else if let Some(irq) = self.irq_vec.get(vector as usize).unwrap_or(&None) {
irq.irqfd.write(1).unwrap();
}
}
/// Return the raw descriptor of the MSI device socket
pub fn get_msi_socket(&self) -> RawDescriptor {
self.msi_device_socket.as_raw_descriptor()
}
/// Return irqfd of MSI-X Table entry
///
/// # Arguments
/// * 'vector' - the index to the MSI-X table entry
pub fn get_irqfd(&self, vector: usize) -> Option<&Event> {
match self.irq_vec.get(vector as usize).unwrap_or(&None) {
Some(irq) => Some(&irq.irqfd),
None => None,
}
}
pub fn destroy(&mut self) {
while let Some(irq) = self.irq_vec.pop() {
if let Some(irq) = irq {
let request = VmIrqRequest::ReleaseOneIrq {
gsi: irq.gsi,
irqfd: irq.irqfd,
};
if self.msi_device_socket.send(&request).is_err() {
continue;
}
let _ = self.msi_device_socket.recv::<VmIrqResponse>();
}
}
}
}
impl AsRawDescriptor for MsixConfig {
fn as_raw_descriptor(&self) -> RawDescriptor {
self.msi_device_socket.as_raw_descriptor()
}
}
/// Message Control Register
// 10-0: MSI-X Table size
// 13-11: Reserved
// 14: Mask. Mask all MSI-X when set.
// 15: Enable. Enable all MSI-X when set.
// See <https://wiki.osdev.org/PCI#Enabling_MSI-X> for the details.
#[bitfield]
#[derive(Copy, Clone, Default)]
pub struct MsixCtrl {
table_size: B10,
reserved: B4,
mask: B1,
enable: B1,
}
// It is safe to implement DataInit; all members are simple numbers and any value is valid.
unsafe impl DataInit for MsixCap {}
#[allow(dead_code)]
#[repr(C)]
#[derive(Clone, Copy, Default)]
/// MSI-X Capability Structure
pub struct MsixCap {
// To make add_capability() happy
_cap_vndr: u8,
_cap_next: u8,
// Message Control Register
msg_ctl: MsixCtrl,
// Table. Contains the offset and the BAR indicator (BIR)
// 2-0: Table BAR indicator (BIR). Can be 0 to 5.
// 31-3: Table offset in the BAR pointed by the BIR.
table: u32,
// Pending Bit Array. Contains the offset and the BAR indicator (BIR)
// 2-0: PBA BAR indicator (BIR). Can be 0 to 5.
// 31-3: PBA offset in the BAR pointed by the BIR.
pba: u32,
}
impl PciCapability for MsixCap {
fn bytes(&self) -> &[u8] {
self.as_slice()
}
fn id(&self) -> PciCapabilityID {
PciCapabilityID::Msix
}
fn writable_bits(&self) -> Vec<u32> {
// Only msg_ctl[15:14] is writable
vec![0x3000_0000, 0, 0]
}
}
impl MsixCap {
pub fn new(
table_pci_bar: u8,
table_size: u16,
table_off: u32,
pba_pci_bar: u8,
pba_off: u32,
) -> Self {
assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE);
// Set the table size and enable MSI-X.
let mut msg_ctl = MsixCtrl::new();
msg_ctl.set_enable(1);
// Table Size is N - 1 encoded.
msg_ctl.set_table_size(table_size - 1);
MsixCap {
_cap_vndr: 0,
_cap_next: 0,
msg_ctl,
table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8),
pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8),
}
}
#[cfg(unix)]
pub fn msg_ctl(&self) -> MsixCtrl {
self.msg_ctl
}
}