diff --git a/lib/propolis/src/hw/virtio/bits.rs b/lib/propolis/src/hw/virtio/bits.rs index c94313f55..a7372886a 100644 --- a/lib/propolis/src/hw/virtio/bits.rs +++ b/lib/propolis/src/hw/virtio/bits.rs @@ -21,6 +21,8 @@ pub const VIRTIO_NET_F_STATUS: u64 = 1 << 16; pub const VIRTIO_NET_F_CTRL_VQ: u64 = 1 << 17; pub const VIRTIO_NET_F_CTRL_RX: u64 = 1 << 18; pub const VIRTIO_NET_F_CTRL_VLAN: u64 = 1 << 19; +pub const VIRTIO_NET_F_CTRL_RX_EXTRA: u64 = 1 << 20; +pub const VIRTIO_NET_F_GUEST_ANNOUNCE: u64 = 1 << 21; pub const VIRTIO_NET_F_MQ: u64 = 1 << 22; // virtio-block feature bits diff --git a/lib/propolis/src/hw/virtio/queue.rs b/lib/propolis/src/hw/virtio/queue.rs index e67b838e5..321ce5207 100644 --- a/lib/propolis/src/hw/virtio/queue.rs +++ b/lib/propolis/src/hw/virtio/queue.rs @@ -2,6 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use std::io::Error as IoError; use std::mem; use std::num::{NonZeroU16, Wrapping}; use std::sync::atomic::{fence, AtomicBool, AtomicUsize, Ordering}; @@ -192,10 +193,11 @@ impl VqUsed { mem.write(self.gpa_flags, &value); } - /// Disables notifications on this queue; returns the previous state. + /// Disables notifications on this queue; returns whether notfications were + /// enabled before. fn disable_notify(&self, mem: &MemCtx) -> bool { let flags = self.flags(mem); - let current = flags.contains(UsedFlags::NO_NOTIFY); + let current = !flags.contains(UsedFlags::NO_NOTIFY); self.set_flags(flags | UsedFlags::NO_NOTIFY, mem); current } @@ -326,6 +328,10 @@ impl VirtQueue { used.reset(); self.live.store(false, Ordering::Release); self.enabled.store(false, Ordering::Release); + + // We don't modify `self.is_control` here. This allows devices to avoid, + // e.g., attempting to plumb a later `VqChange::Reset` down to the + // kernel in the case of viona. } pub(super) fn enable(&self) { @@ -348,8 +354,8 @@ impl VirtQueue { self.is_control.load(Ordering::Acquire) } - pub(super) fn set_control(&self) { - self.is_control.store(true, Ordering::Release); + fn set_control(&self, val: bool) { + self.is_control.store(val, Ordering::Release); } #[inline(always)] @@ -564,7 +570,9 @@ impl VirtQueue { used.interrupt.as_ref().map(|x| x.read()) } - /// Disables interrupts (notifications) on the `Used` ring + /// Disables interrupts (notifications) on the `Used` ring. + /// + /// Returns `true` if notifications were previously enabled. pub(super) fn disable_intr(&self, mem: &MemCtx) -> bool { let used = self.used.lock().unwrap(); used.disable_notify(mem) @@ -786,6 +794,66 @@ impl Chain { }); total == item_sz } + + pub fn read_many_owned( + &mut self, + mem: &MemCtx, + n_elements: usize, + ) -> Result, IoError> { + let required_sz = mem::size_of::() * n_elements; + if (self.read_stat.bytes_remain as usize) < required_sz { + return Err(IoError::new( + std::io::ErrorKind::InvalidData, + "Buffer too small", + )); + } + + let mut vec = Vec::with_capacity(n_elements); + + if n_elements == 0 { + return Ok(vec); + } + + // SAFETY: a `u8` has no alignment requirement so it is safe to + // construct a byte slice from another Vec. These bytes will not be read + // before initialisation. + let raw = unsafe { + std::slice::from_raw_parts_mut( + vec.spare_capacity_mut().as_ptr() as *mut u8, + required_sz, + ) + }; + + let mut done = 0; + let total = self.for_remaining_type(true, |addr, len| { + let mut remain = GuestData::from(&mut raw[done..]); + if let Some(copied) = mem.read_into(addr, &mut remain, len) { + let need_more = copied != remain.len(); + + done += copied; + (copied, need_more) + } else { + // Copy failed, so do not attempt anything else + (0, false) + } + }); + + if total == required_sz { + // SAFETY: read_many() was successful and just initialized all + // `n_elements` of the vector. + unsafe { + vec.set_len(n_elements); + } + + Ok(vec) + } else { + Err(IoError::new( + std::io::ErrorKind::UnexpectedEof, + "Insufficient bytes to complete read", + )) + } + } + /// Fetch a string of readable guest regions from the chain, provided there /// are enough to cover a specified length. pub fn readable_bufs(&mut self, len: usize) -> Option> { @@ -808,6 +876,7 @@ impl Chain { assert_eq!(remain, 0); Some(bufs) } + pub fn write( &mut self, item: &T, @@ -1030,6 +1099,17 @@ impl VirtQueues { Ok(()) } + pub fn set_ctl_queues(&self, indices: &[u16]) -> Result<(), ()> { + for vq in &self.queues { + vq.set_control(false); + } + for i in indices { + let vq = self.queues.get(usize::from(*i)).ok_or(())?; + vq.set_control(true); + } + Ok(()) + } + pub fn count(&self) -> NonZeroU16 { NonZeroU16::try_from(self.len() as u16) .expect("queue count already validated") @@ -1055,25 +1135,19 @@ impl VirtQueues { pub fn get(&self, qid: u16) -> Option<&Arc> { let len = self.len(); let qid = usize::from(qid); - // XXX: This special case is for the virtio network device, which always - // puts the control queue at the end of queue vector (see VirtIO 1.2 - // section 5.1.2). None of the other devices currently handle queues - // specially in this way, but we should come up with some better - // mechanism here. - if qid + 1 == len { - Some(self.get_control()) - } else { - self.queues[..len].get(qid) - } - } - fn get_control(&self) -> &Arc { - &self.queues[self.max_capacity() - 1] + // Control queues may be placed almost arbitrarily depending on the + // device type -- they may be mixed in with other queues, or placed at + // the very end after preallocated but unused queues. + self.queues.get(qid).filter(|v| v.is_control() || qid < len) } pub fn iter(&self) -> impl std::iter::Iterator> { - let len = self.len() - 1; - self.queues[..len].iter().chain([self.get_control()]) + let len = self.len(); + self.queues + .iter() + .enumerate() + .filter_map(move |(i, v)| (i < len || v.is_control()).then_some(v)) } /// Iterate all queues the device may have used; the current number of @@ -1081,8 +1155,11 @@ impl VirtQueues { /// like device reset and teardown we must manage all viona rings /// corresponding to ever-active VirtQueues. pub fn iter_all(&self) -> impl std::iter::Iterator> { - let peak = self.peak() - 1; - self.queues[..peak].iter().chain([self.get_control()]) + let peak = self.peak(); + self.queues + .iter() + .enumerate() + .filter_map(move |(i, v)| (i < peak || v.is_control()).then_some(v)) } pub fn export(&self) -> migrate::VirtQueuesV1 { diff --git a/lib/propolis/src/hw/virtio/viona.rs b/lib/propolis/src/hw/virtio/viona.rs index 9a7f9bce4..2767f95e2 100644 --- a/lib/propolis/src/hw/virtio/viona.rs +++ b/lib/propolis/src/hw/virtio/viona.rs @@ -28,10 +28,12 @@ use super::{VirtioDevice, VqChange, VqIntr}; use bit_field::BitField; use lazy_static::lazy_static; +use serde::{Deserialize, Serialize}; use tokio::io::unix::AsyncFd; use tokio::io::Interest; use tokio::sync::watch; use tokio::task::JoinHandle; +use zerocopy::{FromBytes, Immutable, IntoBytes}; // Re-export API versioning interface for convenience of propolis consumers pub use viona_api::{api_version, ApiVersion}; @@ -49,6 +51,13 @@ pub const fn max_num_queues() -> usize { PROPOLIS_MAX_MQ_PAIRS as usize * 2 } +/// The index of the control queue when multiqueue ([`VIRTIO_NET_F_MQ`]) has +/// not been negotiated. +/// +/// In this case, the driver will behave as though we have allocated only one +/// Rx/Tx queue pair, followed by the control queue. +pub const VIRTIO_NO_MQ_CTRL_Q_INDEX: usize = 2; + const ETHERADDRL: usize = 6; /// The caller of `set_use_pairs` will probably be inlined into a larger @@ -65,15 +74,16 @@ enum MqSetPairsCause { #[usdt::provider(provider = "propolis")] mod probes { fn virtio_viona_mq_set_use_pairs(cause: u8, npairs: u16) {} + fn virtio_viona_cq_request(class: u8, command: u8) {} } /// Types and so forth for supporting the control queue. /// Note that these come from the VirtIO spec, section /// 5.1.6.2 in VirtIO 1.2. pub mod control { - use super::ETHERADDRL; + use super::MacAddr; use std::convert::TryFrom; - use zerocopy::FromBytes; + use zerocopy::{FromBytes, IntoBytes}; /// The control message header has two data: a u8 representing the "class" /// of control message, which describes what the message applies to, and a @@ -83,8 +93,8 @@ pub mod control { #[derive(Clone, Copy, Debug, Default, FromBytes)] #[repr(C)] pub struct Header { - class: u8, - command: u8, + pub class: u8, + pub command: u8, } #[derive(Clone, Copy, Debug)] @@ -112,7 +122,8 @@ pub mod control { } } - #[derive(Clone, Copy, Debug)] + #[derive(Clone, Copy, Debug, IntoBytes)] + #[repr(u8)] pub enum Ack { Ok = 0, Err = 1, @@ -129,6 +140,15 @@ pub mod control { NoBroadcast = 5, } + /// The payload accompanying an [`RxCmd`]. + #[derive(Clone, Copy, Debug, Default, FromBytes)] + #[repr(C)] + pub struct Rx { + /// Whether the flag signalled by the command should be enabled (`1`) + /// or disabled (`0`). + pub set: u8, + } + #[derive(Clone, Copy, Debug, strum::FromRepr)] #[repr(u8)] pub enum MacCmd { @@ -136,13 +156,6 @@ pub mod control { AddrSet = 1, } - #[derive(Clone, Copy, Debug, Default)] - #[repr(C)] - pub struct Mac { - entries: u32, - mac: [u8; ETHERADDRL], - } - #[derive(Clone, Copy, Debug, Default, FromBytes)] #[repr(C)] pub struct Mq { @@ -178,13 +191,51 @@ pub mod control { pub enum AnnounceCmd { Ack = 0, } + + /// Read a MAC filter table from a control queue. + /// + /// These are encoded as a `u32` followed by that number of + /// 6-byte MAC addresses. + pub(super) fn read_mac_list( + chain: &mut super::Chain, + mem: &super::MemCtx, + ) -> Result, ()> { + let mut entry_count = 0u32; + if !chain.read(&mut entry_count, mem) { + return Err(()); + } + + chain + .read_many_owned(mem, entry_count as usize) + .map_err(|_| ()) + .map(Vec::into_boxed_slice) + } +} + +pub mod migrate { + use super::*; + use crate::migrate::*; + + #[derive(Deserialize, Serialize)] + pub struct VionaStateV1 { + pub promisc: PromiscLevel, + pub filter: u8, + pub unicast_mac_filters: Vec, + pub multicast_mac_filters: Vec, + } + + impl Schema<'_> for VionaStateV1 { + fn id() -> SchemaId { + ("pci-virtio-viona", 1) + } + } } /// Viona's in-kernel emulation of the device VirtQueues is performed in what /// are calls "vrings". Since the userspace portion of the Viona emulation is /// tasked with keeping the vring state in sync with the VirtQueue it /// represents, we must track its perceived state. -#[derive(Copy, Clone, Default, Eq, PartialEq)] +#[derive(Copy, Clone, Default, Eq, PartialEq, Debug)] enum VRingState { /// Initial state of the vring as it comes out of reset /// @@ -219,19 +270,86 @@ enum VRingState { Fatal, } +bitflags! { + /// Packet receive filters requested by a virtio NIC driver. + /// + /// Each filter requested by the guest is a discrete value of + /// [`control::RxCmd`], configured with a binary state (on/off). The device + /// is responsible for determining which traffic should be allowed and + /// filtered based on how these flags take priority over one another. We + /// represent this as a bitfield for simplicity. + #[derive(Copy, Clone, Debug)] + struct FilterState: u8 { + /// The driver has requested that we enter promiscuous mode, and filter + /// no packets based on MAC address. This supersedes all other active + /// flags. + /// + /// Requires [`VIRTIO_NET_F_CTRL_RX`]. + const PROMISCUOUS = 1 << 0; + /// The driver has requested that we allow it to receive all multicast + /// packets, regardless of filter table state. + /// + /// Requires [`VIRTIO_NET_F_CTRL_RX`]. + const ALL_MULTICAST = 1 << 1; + /// The driver has requested that we allow it to receive all unicast + /// packets, regardless of filter table state. + /// + /// Requires [`VIRTIO_NET_F_CTRL_RX_EXTRA`]. + const ALL_UNICAST = 1 << 2; + /// The driver has requested that we drop all multicast packets, except + /// from broadcast frames. This supersedes [`Self::ALL_MULTICAST`]. + /// + /// Requires [`VIRTIO_NET_F_CTRL_RX_EXTRA`]. + const NO_MULTICAST = 1 << 3; + /// The driver has requested that we drop all unicast packets, except + /// from broadcast frames. This supersedes [`Self::ALL_UNICAST`]. + /// + /// Requires [`VIRTIO_NET_F_CTRL_RX_EXTRA`]. + const NO_UNICAST = 1 << 4; + /// The driver has requested that we drop all broadcast packets. + /// This supersedes [`Self::ALL_MULTICAST`]. + /// + /// Requires [`VIRTIO_NET_F_CTRL_RX_EXTRA`]. + const NO_BROADCAST = 1 << 5; + + /// All commands exposed by [`VIRTIO_NET_F_CTRL_RX`]. + const RX_CMDS = Self::PROMISCUOUS.bits() | Self::ALL_MULTICAST.bits(); + + /// All commands exposed by [`VIRTIO_NET_F_CTRL_RX_EXTRA`]. + const RX_EXTRA_CMDS = Self::ALL_UNICAST.bits() + | Self::NO_MULTICAST.bits() + | Self::NO_UNICAST.bits() + | Self::NO_BROADCAST.bits(); + } +} + struct Inner { poller: Option, iop_state: Option, notify_mmio_addr: Option, vring_state: Vec, + + promisc: PromiscLevel, + filter: FilterState, + unicast_mac_filters: Box<[MacAddr]>, + multicast_mac_filters: Box<[MacAddr]>, } impl Inner { - fn new(max_queues: usize) -> Self { + fn new(max_queues: usize, promisc: PromiscLevel) -> Self { let vring_state = vec![Default::default(); max_queues]; let poller = None; let iop_state = None; let notify_mmio_addr = None; - Self { poller, iop_state, notify_mmio_addr, vring_state } + Self { + poller, + iop_state, + notify_mmio_addr, + vring_state, + promisc, + filter: FilterState::empty(), + unicast_mac_filters: Box::new([]), + multicast_mac_filters: Box::new([]), + } } /// Get the `VRingState` for a given VirtQueue @@ -299,6 +417,77 @@ impl Default for DeviceParams { } } +/// Relaxed levels of packet filtering offered by viona. +#[derive( + Copy, Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize, +)] +pub enum PromiscLevel { + /// The device should receive only packets for its installed MAC + /// filters. + /// + /// Today this allows solely [`PciVirtioViona::mac_addr`]. + #[default] + None, + /// The device should receive all multicast traffic in addition + /// to its registered unicast filters. + AllMulti, + /// The device should receive all packets. + All, + #[cfg(feature = "falcon")] + /// The device should receive all packets, including VLAN tags. + /// + /// This suggests a bug in viona: when `VIRTIO_NET_F_CTRL_VLAN` is + /// not negotiated, the device _should_ accept all VLAN-tagged frames. + /// + /// This mechanism is only present on the branch + /// oxidecomputer/illumos-gate/viona_vlans. If the OS supports this, + /// then we must remain in this mode if set to ensure hosts receive + /// the correct traffic. + AllVlan, +} + +impl From for usize { + fn from(value: PromiscLevel) -> Self { + (match value { + PromiscLevel::None => viona_api::VIONA_PROMISC_NONE, + PromiscLevel::AllMulti => viona_api::VIONA_PROMISC_MULTI, + PromiscLevel::All => viona_api::VIONA_PROMISC_ALL, + #[cfg(feature = "falcon")] + PromiscLevel::AllVlan => viona_api::VIONA_PROMISC_ALL_VLAN, + }) as usize + } +} + +/// A MAC address. +#[derive( + Copy, + Clone, + Debug, + Default, + Eq, + PartialEq, + Ord, + PartialOrd, + FromBytes, + IntoBytes, + Immutable, + Serialize, + Deserialize, +)] +pub struct MacAddr([u8; ETHERADDRL]); + +impl From<[u8; ETHERADDRL]> for MacAddr { + fn from(value: [u8; ETHERADDRL]) -> Self { + Self(value) + } +} + +impl MacAddr { + pub const fn is_unicast(&self) -> bool { + (self.0[0] & 0b1) == 0 + } +} + /// Represents a connection to the kernel's Viona (VirtIO Network Adapter) /// driver. pub struct PciVirtioViona { @@ -307,7 +496,7 @@ pub struct PciVirtioViona { indicator: lifecycle::Indicator, dev_features: u64, - mac_addr: [u8; ETHERADDRL], + mac_addr: MacAddr, mtu: Option, hdl: VionaHdl, inner: Mutex, @@ -341,13 +530,23 @@ impl PciVirtioViona { let info = dlhdl.query_link(vnic_name)?; let hdl = VionaHdl::new(info.link_id, vm.fd())?; + // Viona is configured in all-multicast mode by default. We'll downgrade + // this later if the guest supports `VIRTIO_NET_F_CTRL_RX`. + #[cfg(not(feature = "falcon"))] + let promisc_level = PromiscLevel::AllMulti; #[cfg(feature = "falcon")] - if let Err(e) = hdl.set_promisc(viona_api::VIONA_PROMISC_ALL_VLAN) { - // Until/unless this support is integrated into stlouis/illumos, - // this is an expected failure. This is needed to use vlans, - // but shouldn't affect any other use case. - eprintln!("failed to enable promisc mode on {vnic_name}: {e:?}"); - } + let promisc_level = match hdl.set_promisc(PromiscLevel::AllVlan) { + Ok(()) => PromiscLevel::AllVlan, + Err(e) => { + // Until/unless this support is integrated into stlouis/illumos, + // this is an expected failure. This is needed to use vlans, + // but shouldn't affect any other use case. + eprintln!( + "failed to enable promisc mode on {vnic_name}: {e:?}" + ); + PromiscLevel::AllMulti + } + }; if let Some(vp) = viona_params { vp.set(&hdl)?; @@ -373,13 +572,15 @@ impl PciVirtioViona { .chain([ctl_queue_size]) .collect::>(); // The vector is sized with the maximum number of rings/queues, but - // until the driver negotiates multiqueue, we only use the first two. - let queues = VirtQueues::new_with_len(3, &queue_sizes); - if let Some(ctlq) = queues.get(2) { - ctlq.set_control(); - } + // until the driver negotiates multiqueue, we only use the first two + // for the datapath. `queue_sizes` always contains at least three + // elements -- the third will serve as the control queue if + // multiqueue is not negotiated, even if it is a little large for that + // purpose. + let queues = VirtQueues::new_with_len(2, &queue_sizes); let nqueues = queues.max_capacity(); hdl.set_pairs(1).unwrap(); + // Add one for config space. let msix_count = Some(1 + nqueues as u16); let (virtio_state, pci_state) = PciVirtioState::new( @@ -391,17 +592,16 @@ impl PciVirtioViona { ); let dev_features = hdl.get_avail_features()?; - let mut this = PciVirtioViona { + let this = PciVirtioViona { virtio_state, pci_state, indicator: Default::default(), dev_features, - mac_addr: [0; ETHERADDRL], + mac_addr: info.mac_addr.into(), mtu: info.mtu, hdl, - inner: Mutex::new(Inner::new(nqueues)), + inner: Mutex::new(Inner::new(nqueues, promisc_level)), }; - this.mac_addr.copy_from_slice(&info.mac_addr); let this = Arc::new(this); // Spawn the interrupt poller @@ -430,17 +630,13 @@ impl PciVirtioViona { } } - fn is_ctl_queue(&self, vq: &VirtQueue) -> bool { - usize::from(vq.id) + 1 == self.virtio_state.queues.len() - } - fn ctl_queue_notify(&self, vq: &VirtQueue) { if let Some(mem) = self.pci_state.acc_mem.access() { while !vq.avail_is_empty(&mem) { let mut chain = Chain::with_capacity(4); let intrs_en = vq.disable_intr(&mem); while let Some((_idx, _len)) = vq.pop_avail(&mut chain, &mem) { - let res = match self.ctl_msg(vq, &mut chain, &mem) { + let res = match self.ctl_msg(&mut chain, &mem) { Ok(_) => control::Ack::Ok, Err(_) => control::Ack::Err, } as u8; @@ -454,46 +650,70 @@ impl PciVirtioViona { } } - fn ctl_msg( - &self, - vq: &VirtQueue, - chain: &mut Chain, - mem: &MemCtx, - ) -> Result<(), ()> { + fn ctl_msg(&self, chain: &mut Chain, mem: &MemCtx) -> Result<(), ()> { let mut header = control::Header::default(); if !chain.read(&mut header, &mem) { return Err(()); } + probes::virtio_viona_cq_request!(|| (header.class, header.command)); + use control::Command; match Command::try_from(header).map_err(|_| ())? { - Command::Rx(cmd) => self.ctl_rx(cmd, vq, chain, mem), - Command::Mac(cmd) => self.ctl_mac(cmd, vq, chain, mem), - Command::Vlan(_) => Ok(()), - Command::Announce(_) => Ok(()), - Command::Mq(cmd) => self.ctl_mq(cmd, vq, chain, mem), + Command::Rx(cmd) => self.ctl_rx(cmd, chain, mem), + Command::Mac(cmd) => self.ctl_mac(cmd, chain, mem), + // We do not yet advertise `VIRTIO_NET_F_CTRL_VLAN`. + Command::Vlan(_) => Err(()), + // We do not yet advertise `VIRTIO_NET_F_GUEST_ANNOUNCE` + Command::Announce(_) => Err(()), + Command::Mq(cmd) => self.ctl_mq(cmd, chain, mem), } } fn ctl_rx( &self, cmd: control::RxCmd, - vq: &VirtQueue, chain: &mut Chain, mem: &MemCtx, ) -> Result<(), ()> { - let _todo = (cmd, vq, chain, mem); - Err(()) + use control::RxCmd; + let filter = match cmd { + RxCmd::Promisc => FilterState::PROMISCUOUS, + RxCmd::AllMulticast => FilterState::ALL_MULTICAST, + RxCmd::AllUnicast => FilterState::ALL_UNICAST, + RxCmd::NoMulticast => FilterState::NO_MULTICAST, + RxCmd::NoUnicast => FilterState::NO_UNICAST, + RxCmd::NoBroadcast => FilterState::NO_BROADCAST, + }; + + let mut msg = control::Rx::default(); + if !chain.read(&mut msg, &mem) { + return Err(()); + } + let active = match msg.set { + 0 => false, + 1 => true, + _ => return Err(()), + }; + self.set_filter_state(filter, active) } fn ctl_mac( &self, cmd: control::MacCmd, - vq: &VirtQueue, chain: &mut Chain, mem: &MemCtx, ) -> Result<(), ()> { - let _todo = (cmd, vq, chain, mem); - Err(()) + use control::MacCmd; + match cmd { + MacCmd::TableSet => { + let unicast = control::read_mac_list(chain, mem)?; + let multicast = control::read_mac_list(chain, mem)?; + + self.set_mac_filters(unicast, multicast) + } + // We do not advertise `VIRTIO_NET_F_CTRL_MAC_ADDR` + MacCmd::AddrSet => return Err(()), + } } fn set_use_pairs(&self, requested: u16) -> Result<(), ()> { @@ -505,28 +725,24 @@ impl PciVirtioViona { return Ok(()); } self.hdl.set_usepairs(requested).unwrap(); - self.virtio_state - .queues - .set_len(npairs * 2 + 1) - .expect("num queue pairs"); + self.virtio_state.queues.set_len(npairs * 2).expect("num queue pairs"); Ok(()) } fn ctl_mq( &self, cmd: control::MqCmd, - vq: &VirtQueue, chain: &mut Chain, mem: &MemCtx, ) -> Result<(), ()> { use control::MqCmd; - let _todo = vq; match cmd { MqCmd::SetPairs => { let mut msg = control::Mq::default(); if !chain.read(&mut msg, &mem) { return Err(()); } + let npairs = msg.npairs; probes::virtio_viona_mq_set_use_pairs!(|| ( MqSetPairsCause::Commanded as u8, @@ -541,7 +757,7 @@ impl PciVirtioViona { fn net_cfg_read(&self, id: &NetReg, ro: &mut ReadOp) { match id { - NetReg::Mac => ro.write_bytes(&self.mac_addr), + NetReg::Mac => ro.write_bytes(&self.mac_addr.0), NetReg::Status => { // Always report link up ro.write_u16(VIRTIO_NET_S_LINK_UP); @@ -713,13 +929,149 @@ impl PciVirtioViona { let _ = self.hdl.set_notify_mmio_addr(state.notify_mmio_addr); } } + + /// Set or unset unicast/multicast class-wide filters on behalf of a driver. + fn set_filter_state( + &self, + filter: FilterState, + active: bool, + ) -> Result<(), ()> { + if (self.virtio_state.negotiated_features() & VIRTIO_NET_F_CTRL_RX) == 0 + && filter.intersects(FilterState::RX_CMDS) + { + return Err(()); + } + + if filter.intersects(FilterState::RX_EXTRA_CMDS) { + // We cannot express any of the extra filters within viona yet. + return Err(()); + } + + let mut state = self.inner.lock().unwrap(); + let old_filter = state.filter; + state.filter.set(filter, active); + + match self.set_promisc(self.needed_promisc(&state), &mut state) { + Ok(()) => Ok(()), + Err(()) => { + state.filter = old_filter; + Err(()) + } + } + } + + /// Replace the requested set of explicit MAC address filters on a device + /// with a new table provided by the driver. + fn set_mac_filters( + &self, + mut unicast: Box<[MacAddr]>, + mut multicast: Box<[MacAddr]>, + ) -> Result<(), ()> { + if (self.virtio_state.negotiated_features() & VIRTIO_NET_F_CTRL_RX) == 0 + { + return Err(()); + } + + if unicast.iter().any(|v| !v.is_unicast()) + || multicast.iter().any(|v| v.is_unicast()) + { + return Err(()); + } + + let mut state = self.inner.lock().unwrap(); + + std::mem::swap(&mut unicast, &mut state.unicast_mac_filters); + std::mem::swap(&mut multicast, &mut state.multicast_mac_filters); + + match self.set_promisc(self.needed_promisc(&state), &mut state) { + Ok(()) => Ok(()), + Err(()) => { + state.unicast_mac_filters = unicast; + state.multicast_mac_filters = multicast; + Err(()) + } + } + } + + /// Update the promisc level of the device. + fn set_promisc( + &self, + level: PromiscLevel, + state: &mut Inner, + ) -> Result<(), ()> { + if level == state.promisc { + return Ok(()); + } + + match self.hdl.set_promisc(level) { + Ok(_) => { + state.promisc = level; + Ok(()) + } + Err(_) => { + // Ensure that we return to the old level of promisc. + if self.hdl.set_promisc(state.promisc).is_err() { + self.virtio_state.set_needs_reset(self); + } + Err(()) + } + } + } + + /// Compute whether the driver requires us to move into promiscuous mode + /// based on its MAC filters and explicit filter mode. + fn needed_promisc(&self, state: &Inner) -> PromiscLevel { + // The VLAN tag workaround, if requested, always wins and cannot + // be downgraded. + #[cfg(feature = "falcon")] + if state.promisc == PromiscLevel::AllVlan { + return PromiscLevel::AllVlan; + } + + // We don't yet have any mechanism to account for ALL_UNICAST or + // related filters from VIRTIO_NET_F_CTRL_RX_EXTRA, and the guest will + // not request them. + + // We don't have an ioctl yet for viona to explicitly install a set of + // filters. For now, we need to apply some level of promiscuous mode + // to give the guest what it asks for. + // + // Even though the guest can't yet use its parent feature, we can still + // account for NO_MULTICAST here. + let need_mcast = (state.filter.contains(FilterState::ALL_MULTICAST) + && !state.filter.contains(FilterState::NO_MULTICAST)) + || !state.multicast_mac_filters.is_empty(); + + // Don't inflict promiscuous mode on drivers which request only their + // own MAC address. + let filter_is_self = state + .unicast_mac_filters + .get(0) + .map(|mac| mac == &self.mac_addr) + .unwrap_or_default(); + let need_promisc = state.filter.contains(FilterState::PROMISCUOUS) + || !(filter_is_self || state.unicast_mac_filters.is_empty()); + + if need_promisc { + PromiscLevel::All + } else if need_mcast { + PromiscLevel::AllMulti + } else { + PromiscLevel::None + } + } } impl VirtioDevice for PciVirtioViona { fn rw_dev_config(&self, mut rwo: RWOp) { NET_DEV_REGS.process(&mut rwo, |id, rwo| match rwo { RWOp::Read(ro) => self.net_cfg_read(id, ro), RWOp::Write(_) => { - //ignore writes + // Ignore writes. + // + // Technically while we are in either `Mode::Transitional` + // or `Mode::Legacy` the driver may write to `NetReg::Mac` + // in lieu of sending a `MacCmd::AddrSet`. We don't support + // changing the MAC address today. } }); } @@ -731,6 +1083,7 @@ impl VirtioDevice for PciVirtioViona { let mut feat = VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ + | VIRTIO_NET_F_CTRL_RX | VIRTIO_NET_F_MQ; // We drop the "VIRTIO_NET_F_MTU" flag from feat if we are unable to // query it. This can happen when executing within a non-global Zone. @@ -746,19 +1099,42 @@ impl VirtioDevice for PciVirtioViona { fn set_features(&self, feat: u64) -> Result<(), ()> { self.hdl.set_features(feat).map_err(|_| ())?; - if (feat & VIRTIO_NET_F_MQ) != 0 { - self.hdl.set_pairs(PROPOLIS_MAX_MQ_PAIRS).map_err(|_| ())?; - probes::virtio_viona_mq_set_use_pairs!(|| ( - MqSetPairsCause::MqEnabled as u8, - PROPOLIS_MAX_MQ_PAIRS - )); - self.set_use_pairs(PROPOLIS_MAX_MQ_PAIRS)?; - } - Ok(()) + + // Any remaining setup is for control-queue based features. + let control_queue = if (feat & VIRTIO_NET_F_CTRL_VQ) == 0 { + None + } else { + if self.virtio_state.queues.max_capacity() < 3 { + // Since we're advertising control queue support, we need + // one Rx, Tx, and CtlQ at the minimum. + return Err(()); + } + + let ctl_q_idx = if (feat & VIRTIO_NET_F_MQ) != 0 { + self.hdl.set_pairs(PROPOLIS_MAX_MQ_PAIRS).map_err(|_| ())?; + probes::virtio_viona_mq_set_use_pairs!(|| ( + MqSetPairsCause::MqEnabled as u8, + PROPOLIS_MAX_MQ_PAIRS + )); + self.set_use_pairs(PROPOLIS_MAX_MQ_PAIRS)?; + self.virtio_state.queues.max_capacity() - 1 + } else { + VIRTIO_NO_MQ_CTRL_Q_INDEX + }; + + if (feat & VIRTIO_NET_F_CTRL_RX) != 0 { + let mut state = self.inner.lock().unwrap(); + self.set_promisc(PromiscLevel::None, &mut state)?; + } + + Some(ctl_q_idx.try_into().expect("queue index must be a valid u16")) + }; + + self.virtio_state.queues.set_ctl_queues(control_queue.as_slice()) } fn queue_notify(&self, vq: &VirtQueue) { - if self.is_ctl_queue(vq) { + if vq.is_control() { self.ctl_queue_notify(vq); return; } @@ -850,6 +1226,12 @@ impl Lifecycle for PciVirtioViona { self.set_use_pairs(1).expect("can set viona back to one queue pair"); self.hdl.set_pairs(1).expect("can set viona back to one queue pair"); self.virtio_state.queues.reset_peak(); + + let mut state = self.inner.lock().unwrap(); + state.unicast_mac_filters = Box::new([]); + state.multicast_mac_filters = Box::new([]); + self.set_promisc(PromiscLevel::AllMulti, &mut state) + .expect("can reset viona promiscuous state"); } fn start(&self) -> anyhow::Result<()> { self.run(); @@ -922,7 +1304,20 @@ impl MigrateMulti for PciVirtioViona { output: &mut PayloadOutputs, ctx: &MigrateCtx, ) -> Result<(), MigrateStateError> { - ::export(self, output, ctx) + ::export(self, output, ctx)?; + + let viona_state = { + let state = self.inner.lock().unwrap(); + + migrate::VionaStateV1 { + promisc: state.promisc, + filter: state.filter.bits(), + unicast_mac_filters: state.unicast_mac_filters.to_vec(), + multicast_mac_filters: state.multicast_mac_filters.to_vec(), + } + }; + + output.push(viona_state.into()) } fn import( @@ -943,7 +1338,7 @@ impl MigrateMulti for PciVirtioViona { self.hdl.set_pairs(PROPOLIS_MAX_MQ_PAIRS).unwrap(); } // Queue count is a NonZeroU16; hence `get` and -1 will not underflow. - let io_queues = self.virtio_state.queues.count().get() - 1; + let io_queues = self.virtio_state.queues.count().get(); let pairs = io_queues / 2; if !io_queues.is_multiple_of(2) { return Err(MigrateStateError::ImportFailed(format!( @@ -960,7 +1355,24 @@ impl MigrateMulti for PciVirtioViona { )) })?; - Ok(()) + let input: migrate::VionaStateV1 = offer.take()?; + + let mut state = self.inner.lock().unwrap(); + state.filter = + FilterState::from_bits(input.filter).ok_or_else(|| { + MigrateStateError::ImportFailed(format!( + "unrecognised flags in filter state: {:x}", + input.filter & !FilterState::all().bits() + )) + })?; + state.unicast_mac_filters = input.unicast_mac_filters.into(); + state.multicast_mac_filters = input.multicast_mac_filters.into(); + self.set_promisc(input.promisc, &mut state).map_err(|_| { + MigrateStateError::ImportFailed(format!( + "Could not move device promisc level to {:?}.", + input.promisc + )) + }) } } @@ -1236,11 +1648,9 @@ impl VionaHdl { Ok(()) } - /// /// Set the desired promiscuity level on this interface. - #[cfg(feature = "falcon")] - fn set_promisc(&self, p: i32) -> io::Result<()> { - self.0.ioctl_usize(viona_api::VNA_IOC_SET_PROMISC, p as usize)?; + fn set_promisc(&self, p: PromiscLevel) -> io::Result<()> { + self.0.ioctl_usize(viona_api::VNA_IOC_SET_PROMISC, usize::from(p))?; Ok(()) } } @@ -1921,6 +2331,7 @@ mod test { self.common_config.write_le64(common_cfg::queue_device, used_gpa); + // TODO(ky): why are we failing on this step? self.common_config.write_le16(common_cfg::queue_enable, 1); // Finally, round up so the next queue (if there is one) starts @@ -2233,6 +2644,7 @@ mod test { Lifecycle::reset(test_ctx.dev.as_ref()); let mut driver = test_ctx.create_driver(); driver.modern_device_init(expected_feats); + Lifecycle::reset(test_ctx.dev.as_ref()); let mut driver = test_ctx.create_driver(); driver.modern_device_init(expected_feats | VIRTIO_NET_F_MQ); @@ -2429,17 +2841,24 @@ mod test { const TEST_VNIC: &'static str = "vnic_prop_test0"; for test in tests { let underlying_nic = underlying_nic.clone(); + eprintln!("running viona test '{}'", test.name); rt.block_on(async move { create_vnic(&underlying_nic, TEST_VNIC); - let test_ctx = - create_test_ctx(test.name, &underlying_nic, TEST_VNIC); - Lifecycle::start(test_ctx.dev.as_ref()) - .expect("can start viona device"); - let test_ctx = (test.test_fn)(test_ctx); - drop(test_ctx); - + let res = std::panic::catch_unwind(move || { + let test_ctx = + create_test_ctx(test.name, &underlying_nic, TEST_VNIC); + Lifecycle::start(test_ctx.dev.as_ref()) + .expect("can start viona device"); + let test_ctx = (test.test_fn)(test_ctx); + drop(test_ctx); + }); delete_vnic(TEST_VNIC); + if let Err(_) = res { + panic!("viona test '{}' was unsuccessful\n", test.name); + } else { + eprintln!("viona test '{}' was successful\n", test.name); + } }); } }