From 0357b54d67acaec4ef8afed7ae7ed5e415e9a02e Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 24 Apr 2026 11:00:33 +0100 Subject: [PATCH 1/8] viona: support (enough) `VIRTIO_NET_F_CTRL_RX` to remove promisc Closes #1122. --- lib/propolis/src/hw/virtio/bits.rs | 1 + lib/propolis/src/hw/virtio/viona.rs | 337 ++++++++++++++++++++++++++-- 2 files changed, 316 insertions(+), 22 deletions(-) diff --git a/lib/propolis/src/hw/virtio/bits.rs b/lib/propolis/src/hw/virtio/bits.rs index c94313f55..b78d577f4 100644 --- a/lib/propolis/src/hw/virtio/bits.rs +++ b/lib/propolis/src/hw/virtio/bits.rs @@ -21,6 +21,7 @@ pub const VIRTIO_NET_F_STATUS: u64 = 1 << 16; pub const VIRTIO_NET_F_CTRL_VQ: u64 = 1 << 17; pub const VIRTIO_NET_F_CTRL_RX: u64 = 1 << 18; pub const VIRTIO_NET_F_CTRL_VLAN: u64 = 1 << 19; +pub const VIRTIO_NET_F_CTRL_RX_EXTRA: u64 = 1 << 20; pub const VIRTIO_NET_F_MQ: u64 = 1 << 22; // virtio-block feature bits diff --git a/lib/propolis/src/hw/virtio/viona.rs b/lib/propolis/src/hw/virtio/viona.rs index 2368bad2a..bd0f534a5 100644 --- a/lib/propolis/src/hw/virtio/viona.rs +++ b/lib/propolis/src/hw/virtio/viona.rs @@ -127,6 +127,15 @@ pub mod control { NoBroadcast = 5, } + /// The payload accompanying an [`RxCmd`]. + #[derive(Clone, Copy, Debug, Default)] + #[repr(C)] + pub struct Rx { + /// Whether the flag signalled by the command should be enabled (`1`) + /// or disabled (`0`). + pub set: u8, + } + #[derive(Clone, Copy, Debug, strum::FromRepr)] #[repr(u8)] pub enum MacCmd { @@ -176,6 +185,34 @@ pub mod control { pub enum AnnounceCmd { Ack = 0, } + + /// Read a MAC filter table from a control queue. + /// + /// These are encoded as a `u32` followed by that number of + /// 6-byte MAC addresses. + pub(super) fn read_mac_list( + chain: &mut super::Chain, + mem: &super::MemCtx, + ) -> Result, ()> { + let mut entry_count = 0u32; + if !chain.read(&mut entry_count, mem) { + return Err(()); + } + + if entry_count == 0 { + return Ok(Box::new([])); + } + + let mut space = + vec![[0; ETHERADDRL]; entry_count as usize].into_boxed_slice(); + for i in 0..space.len() { + if !chain.read(&mut space[i], mem) { + return Err(()); + } + } + + Ok(space) + } } /// Viona's in-kernel emulation of the device VirtQueues is performed in what @@ -217,19 +254,79 @@ enum VRingState { Fatal, } +bitflags! { + /// Packet receive filters requested by a virtio NIC driver. + #[derive(Copy, Clone, Debug)] + struct FilterState: u8 { + /// The driver has requested that we enter promiscuous mode, and filter no + /// packets based on MAC address. This supersedes all other active flags. + /// + /// Requires [`VIRTIO_NET_F_CTRL_RX`]. + const PROMISCUOUS = 1 << 0; + /// The driver has requested that we allow it to receive all multicast + /// packets, regardless of filter table state. + /// + /// Requires [`VIRTIO_NET_F_CTRL_RX`]. + const ALL_MULTICAST = 1 << 1; + /// The driver has requested that we allow it to receive all unicast + /// packets, regardless of filter table state. + /// + /// Requires [`VIRTIO_NET_F_CTRL_RX_EXTRA`]. + const ALL_UNICAST = 1 << 2; + /// The driver has requested that we drop all multicast packets, except from + /// broadcast frames. This supersedes [`Self::ALL_MULTICAST`]. + /// + /// Requires [`VIRTIO_NET_F_CTRL_RX_EXTRA`]. + const NO_MULTICAST = 1 << 3; + /// The driver has requested that we drop all unicast packets, except from + /// broadcast frames. This supersedes [`Self::ALL_UNICAST`]. + /// + /// Requires [`VIRTIO_NET_F_CTRL_RX_EXTRA`]. + const NO_UNICAST = 1 << 4; + /// The driver has requested that we drop all broadcast packets. + /// This supersedes [`Self::ALL_MULTICAST`]. + /// + /// Requires [`VIRTIO_NET_F_CTRL_RX_EXTRA`]. + const NO_BROADCAST = 1 << 5; + + /// All commands exposed by [`VIRTIO_NET_F_CTRL_RX`]. + const RX_CMDS = Self::PROMISCUOUS.bits() | Self::ALL_MULTICAST.bits(); + + /// All commands exposed by [`VIRTIO_NET_F_CTRL_RX_EXTRA`]. + const RX_EXTRA_CMDS = Self::ALL_UNICAST.bits() + | Self::NO_MULTICAST.bits() + | Self::NO_UNICAST.bits() + | Self::NO_BROADCAST.bits(); + } +} + struct Inner { poller: Option, iop_state: Option, notify_mmio_addr: Option, vring_state: Vec, + + promisc: PromiscLevel, + filter: FilterState, + unicast_mac_filters: Box<[[u8; ETHERADDRL]]>, + multicast_mac_filters: Box<[[u8; ETHERADDRL]]>, } impl Inner { - fn new(max_queues: usize) -> Self { + fn new(max_queues: usize, promisc: PromiscLevel) -> Self { let vring_state = vec![Default::default(); max_queues]; let poller = None; let iop_state = None; let notify_mmio_addr = None; - Self { poller, iop_state, notify_mmio_addr, vring_state } + Self { + poller, + iop_state, + notify_mmio_addr, + vring_state, + promisc, + filter: FilterState::empty(), + unicast_mac_filters: Box::new([]), + multicast_mac_filters: Box::new([]), + } } /// Get the `VRingState` for a given VirtQueue @@ -297,6 +394,45 @@ impl Default for DeviceParams { } } +/// Relaxed levels of packet filtering offered by viona. +#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] +enum PromiscLevel { + /// The device should receive only packets for its installed MAC + /// filters. + /// + /// Today this allows solely [`PciVirtioViona::mac_addr`]. + #[default] + None, + /// The device should receive all multicast traffic in addition + /// to its registered unicast filters. + AllMulti, + /// The device should receive all packets. + All, + #[cfg(feature = "falcon")] + /// The device should receive all packets, including VLAN tags. + /// + /// This suggests a bug in viona: when `VIRTIO_NET_F_CTRL_VLAN` is + /// not negotiated, the device _should_ accept all VLAN-tagged frames. + /// + /// This mechanism is only present on the branch + /// oxidecomputer/illumos-gate/viona_vlans. If the OS supports this, + /// then we must remain in this mode if set to ensure hosts receive + /// the correct traffic. + AllVlan, +} + +impl From for usize { + fn from(value: PromiscLevel) -> Self { + (match value { + PromiscLevel::None => viona_api::VIONA_PROMISC_NONE, + PromiscLevel::AllMulti => viona_api::VIONA_PROMISC_MULTI, + PromiscLevel::All => viona_api::VIONA_PROMISC_ALL, + #[cfg(feature = "falcon")] + PromiscLevel::AllVlan => viona_api::VIONA_PROMISC_ALL_VLAN, + }) as usize + } +} + /// Represents a connection to the kernel's Viona (VirtIO Network Adapter) /// driver. pub struct PciVirtioViona { @@ -339,13 +475,23 @@ impl PciVirtioViona { let info = dlhdl.query_link(vnic_name)?; let hdl = VionaHdl::new(info.link_id, vm.fd())?; + // Viona is configured in all-multicast mode by default. We'll downgrade + // this later if the guest supports `VIRTIO_NET_F_CTRL_RX`. + #[cfg(not(feature = "falcon"))] + let promisc_level = PromiscLevel::AllMulti; #[cfg(feature = "falcon")] - if let Err(e) = hdl.set_promisc(viona_api::VIONA_PROMISC_ALL_VLAN) { - // Until/unless this support is integrated into stlouis/illumos, - // this is an expected failure. This is needed to use vlans, - // but shouldn't affect any other use case. - eprintln!("failed to enable promisc mode on {vnic_name}: {e:?}"); - } + let promisc_level = match hdl.set_promisc(PromiscLevel::AllVlan) { + Ok(()) => PromiscLevel::AllVlan, + Err(e) => { + // Until/unless this support is integrated into stlouis/illumos, + // this is an expected failure. This is needed to use vlans, + // but shouldn't affect any other use case. + eprintln!( + "failed to enable promisc mode on {vnic_name}: {e:?}" + ); + PromiscLevel::AllMulti + } + }; if let Some(vp) = viona_params { vp.set(&hdl)?; @@ -397,7 +543,7 @@ impl PciVirtioViona { mac_addr: [0; ETHERADDRL], mtu: info.mtu, hdl, - inner: Mutex::new(Inner::new(nqueues)), + inner: Mutex::new(Inner::new(nqueues, promisc_level)), }; this.mac_addr.copy_from_slice(&info.mac_addr); let this = Arc::new(this); @@ -464,8 +610,8 @@ impl PciVirtioViona { } use control::Command; match Command::try_from(header).map_err(|_| ())? { - Command::Rx(cmd) => self.ctl_rx(cmd, vq, chain, mem), - Command::Mac(cmd) => self.ctl_mac(cmd, vq, chain, mem), + Command::Rx(cmd) => self.ctl_rx(cmd, chain, mem), + Command::Mac(cmd) => self.ctl_mac(cmd, chain, mem), Command::Vlan(_) => Ok(()), Command::Announce(_) => Ok(()), Command::Mq(cmd) => self.ctl_mq(cmd, vq, chain, mem), @@ -475,23 +621,48 @@ impl PciVirtioViona { fn ctl_rx( &self, cmd: control::RxCmd, - vq: &VirtQueue, chain: &mut Chain, mem: &MemCtx, ) -> Result<(), ()> { - let _todo = (cmd, vq, chain, mem); - Err(()) + use control::RxCmd; + let filter = match cmd { + RxCmd::Promisc => FilterState::PROMISCUOUS, + RxCmd::AllMulticast => FilterState::ALL_MULTICAST, + RxCmd::AllUnicast => FilterState::ALL_UNICAST, + RxCmd::NoMulticast => FilterState::NO_MULTICAST, + RxCmd::NoUnicast => FilterState::NO_UNICAST, + RxCmd::NoBroadcast => FilterState::NO_BROADCAST, + }; + + let mut msg = control::Rx::default(); + if !chain.read(&mut msg, &mem) { + return Err(()); + } + let active = match msg.set { + 0 => false, + 1 => true, + _ => return Err(()), + }; + self.set_filter_state(filter, active) } fn ctl_mac( &self, cmd: control::MacCmd, - vq: &VirtQueue, chain: &mut Chain, mem: &MemCtx, ) -> Result<(), ()> { - let _todo = (cmd, vq, chain, mem); - Err(()) + use control::MacCmd; + match cmd { + MacCmd::TableSet => { + let unicast = control::read_mac_list(chain, mem)?; + let multicast = control::read_mac_list(chain, mem)?; + + self.set_mac_filters(unicast, multicast) + } + // We do not advertise `VIRTIO_NET_F_CTRL_MAC_ADDR` + MacCmd::AddrSet => return Err(()), + } } fn set_use_pairs(&self, requested: u16) -> Result<(), ()> { @@ -711,13 +882,125 @@ impl PciVirtioViona { let _ = self.hdl.set_notify_mmio_addr(state.notify_mmio_addr); } } + + /// Set or unset unicast/multicast filters on behalf of a driver. + fn set_filter_state( + &self, + filter: FilterState, + active: bool, + ) -> Result<(), ()> { + if (self.virtio_state.negotiated_features() & VIRTIO_NET_F_CTRL_RX) == 0 + && filter.intersects(FilterState::RX_CMDS) + { + return Err(()); + } + + if filter.intersects(FilterState::RX_EXTRA_CMDS) { + // We cannot express any of the extra filters within viona yet. + return Err(()); + } + + let mut state = self.inner.lock().unwrap(); + let old_filter = state.filter; + state.filter.set(filter, active); + + match self.set_promisc(self.needed_promisc(&state), &mut state) { + Ok(()) => Ok(()), + Err(()) => { + state.filter = old_filter; + Err(()) + } + } + } + + /// Replace + fn set_mac_filters( + &self, + mut unicast: Box<[[u8; ETHERADDRL]]>, + mut multicast: Box<[[u8; ETHERADDRL]]>, + ) -> Result<(), ()> { + if (self.virtio_state.negotiated_features() & VIRTIO_NET_F_CTRL_RX) == 0 + { + return Err(()); + } + + let mut state = self.inner.lock().unwrap(); + + std::mem::swap(&mut unicast, &mut state.unicast_mac_filters); + std::mem::swap(&mut multicast, &mut state.multicast_mac_filters); + + match self.set_promisc(self.needed_promisc(&state), &mut state) { + Ok(()) => Ok(()), + Err(()) => { + state.unicast_mac_filters = unicast; + state.multicast_mac_filters = multicast; + Err(()) + } + } + } + + /// Update the promisc level of the device. + fn set_promisc( + &self, + level: PromiscLevel, + state: &mut Inner, + ) -> Result<(), ()> { + if level == state.promisc { + return Ok(()); + } + + match self.hdl.set_promisc(level) { + Ok(_) => { + state.promisc = level; + Ok(()) + } + Err(_) => Err(()), + } + } + + /// Compute whether the driver requires us to move into promiscuous mode + /// based on its MAC filters and explicit filter mode. + fn needed_promisc(&self, state: &Inner) -> PromiscLevel { + // The VLAN tag workaround, if requested, always wins and cannot + // be downgraded. + #[cfg(feature = "falcon")] + if state.promisc == PromiscLevel::AllVlan { + return PromiscLevel::AllVlan; + } + + let need_mcast = state.filter.contains(FilterState::ALL_MULTICAST) + || !state.multicast_mac_filters.is_empty(); + + // Don't inflict promiscuous mode on drivers which request only their + // own MAC address. + let filter_is_self = state + .unicast_mac_filters + .get(0) + .map(|mac| mac == &self.mac_addr) + .unwrap_or_default(); + let need_promisc = state.filter.contains(FilterState::PROMISCUOUS) + || !(filter_is_self || state.unicast_mac_filters.is_empty()); + + if need_promisc { + PromiscLevel::All + } else if need_mcast { + PromiscLevel::AllMulti + } else { + PromiscLevel::None + } + } } impl VirtioDevice for PciVirtioViona { fn rw_dev_config(&self, mut rwo: RWOp) { NET_DEV_REGS.process(&mut rwo, |id, rwo| match rwo { RWOp::Read(ro) => self.net_cfg_read(id, ro), RWOp::Write(_) => { - //ignore writes + // Ignore writes. + // + // Technically while we are in either `Mode::Transitional` + // or `Mode::Legacy` the driver may write to `NetReg::Mac` + // in lieu of sending a `MacCmd::AddrSet`. We don't support + // changing the MAC address today. } }); } @@ -729,6 +1012,7 @@ impl VirtioDevice for PciVirtioViona { let mut feat = VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ + | VIRTIO_NET_F_CTRL_RX | VIRTIO_NET_F_MQ; // We drop the "VIRTIO_NET_F_MTU" flag from feat if we are unable to // query it. This can happen when executing within a non-global Zone. @@ -752,6 +1036,10 @@ impl VirtioDevice for PciVirtioViona { )); self.set_use_pairs(PROPOLIS_MAX_MQ_PAIRS)?; } + if (feat & VIRTIO_NET_F_CTRL_RX) != 0 { + let mut state = self.inner.lock().unwrap(); + self.set_promisc(PromiscLevel::None, &mut state)?; + } Ok(()) } @@ -848,6 +1136,13 @@ impl Lifecycle for PciVirtioViona { self.set_use_pairs(1).expect("can set viona back to one queue pair"); self.hdl.set_pairs(1).expect("can set viona back to one queue pair"); self.virtio_state.queues.reset_peak(); + + let mut state = self.inner.lock().unwrap(); + state.unicast_mac_filters = Box::new([]); + state.multicast_mac_filters = Box::new([]); + if let Err(_) = self.set_promisc(PromiscLevel::AllMulti, &mut state) { + eprintln!("failed to reset viona promiscuous state") + } } fn start(&self) -> anyhow::Result<()> { self.run(); @@ -1210,11 +1505,9 @@ impl VionaHdl { Ok(()) } - /// /// Set the desired promiscuity level on this interface. - #[cfg(feature = "falcon")] - fn set_promisc(&self, p: i32) -> io::Result<()> { - self.0.ioctl_usize(viona_api::VNA_IOC_SET_PROMISC, p as usize)?; + fn set_promisc(&self, p: PromiscLevel) -> io::Result<()> { + self.0.ioctl_usize(viona_api::VNA_IOC_SET_PROMISC, usize::from(p))?; Ok(()) } } From a2e3b1128f00234e943822fea0b76e92e2e2805b Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 24 Apr 2026 17:54:51 +0100 Subject: [PATCH 2/8] Actually get it all working. Well, that was an adventure. Closes 1127. --- lib/propolis/src/hw/virtio/bits.rs | 1 + lib/propolis/src/hw/virtio/queue.rs | 12 ++- lib/propolis/src/hw/virtio/viona.rs | 160 +++++++++++++++++++--------- 3 files changed, 119 insertions(+), 54 deletions(-) diff --git a/lib/propolis/src/hw/virtio/bits.rs b/lib/propolis/src/hw/virtio/bits.rs index b78d577f4..a7372886a 100644 --- a/lib/propolis/src/hw/virtio/bits.rs +++ b/lib/propolis/src/hw/virtio/bits.rs @@ -22,6 +22,7 @@ pub const VIRTIO_NET_F_CTRL_VQ: u64 = 1 << 17; pub const VIRTIO_NET_F_CTRL_RX: u64 = 1 << 18; pub const VIRTIO_NET_F_CTRL_VLAN: u64 = 1 << 19; pub const VIRTIO_NET_F_CTRL_RX_EXTRA: u64 = 1 << 20; +pub const VIRTIO_NET_F_GUEST_ANNOUNCE: u64 = 1 << 21; pub const VIRTIO_NET_F_MQ: u64 = 1 << 22; // virtio-block feature bits diff --git a/lib/propolis/src/hw/virtio/queue.rs b/lib/propolis/src/hw/virtio/queue.rs index 6a73bf8d5..8c975fd76 100644 --- a/lib/propolis/src/hw/virtio/queue.rs +++ b/lib/propolis/src/hw/virtio/queue.rs @@ -192,10 +192,11 @@ impl VqUsed { mem.write(self.gpa_flags, &value); } - /// Disables notifications on this queue; returns the previous state. + /// Disables notifications on this queue; returns whether notfications were + /// enabled before. fn disable_notify(&self, mem: &MemCtx) -> bool { let flags = self.flags(mem); - let current = flags.contains(UsedFlags::NO_NOTIFY); + let current = !flags.contains(UsedFlags::NO_NOTIFY); self.set_flags(flags | UsedFlags::NO_NOTIFY, mem); current } @@ -326,6 +327,7 @@ impl VirtQueue { used.reset(); self.live.store(false, Ordering::Release); self.enabled.store(false, Ordering::Release); + self.is_control.store(false, Ordering::Release); } pub(super) fn enable(&self) { @@ -564,7 +566,9 @@ impl VirtQueue { used.interrupt.as_ref().map(|x| x.read()) } - /// Disables interrupts (notifications) on the `Used` ring + /// Disables interrupts (notifications) on the `Used` ring. + /// + /// Returns `true` if notifications were previously enabled. pub(super) fn disable_intr(&self, mem: &MemCtx) -> bool { let used = self.used.lock().unwrap(); used.disable_notify(mem) @@ -1052,7 +1056,7 @@ impl VirtQueues { // section 5.1.2). None of the other devices currently handle queues // specially in this way, but we should come up with some better // mechanism here. - if qid + 1 == len { + if qid + 1 == self.max_capacity() { Some(self.get_control()) } else { self.queues[..len].get(qid) diff --git a/lib/propolis/src/hw/virtio/viona.rs b/lib/propolis/src/hw/virtio/viona.rs index bd0f534a5..b8ae8ba53 100644 --- a/lib/propolis/src/hw/virtio/viona.rs +++ b/lib/propolis/src/hw/virtio/viona.rs @@ -49,6 +49,13 @@ pub const fn max_num_queues() -> usize { PROPOLIS_MAX_MQ_PAIRS as usize * 2 } +/// The index of the control queue when multiqueue ([`VIRTIO_NET_F_MQ`]) has +/// not been negotiated. +/// +/// In this case, the driver will behave as though we have allocated only one +/// Rx/Tx queue pair, followed by the control queue. +pub const VIRTIO_NO_MQ_CTRL_Q_INDEX: usize = 2; + const ETHERADDRL: usize = 6; /// The caller of `set_use_pairs` will probably be inlined into a larger @@ -64,25 +71,27 @@ enum MqSetPairsCause { #[usdt::provider(provider = "propolis")] mod probes { fn virtio_viona_mq_set_use_pairs(cause: u8, npairs: u16) {} + fn virtio_viona_cq_request(class: u8, command: u8) {} } /// Types and so forth for supporting the control queue. /// Note that these come from the VirtIO spec, section /// 5.1.6.2 in VirtIO 1.2. pub mod control { - use super::ETHERADDRL; + use super::MacAddr; use std::convert::TryFrom; + use zerocopy::{FromBytes, IntoBytes}; /// The control message header has two data: a u8 representing the "class" /// of control message, which describes what the message applies to, and a /// "command", which describes what action we should take in response to the /// command. So for example, class Mq and command Set means to set the /// number of multiqueue queue pairs. - #[derive(Clone, Copy, Debug, Default)] + #[derive(Clone, Copy, Debug, Default, FromBytes)] #[repr(C)] pub struct Header { - class: u8, - command: u8, + pub class: u8, + pub command: u8, } #[derive(Clone, Copy, Debug)] @@ -110,7 +119,8 @@ pub mod control { } } - #[derive(Clone, Copy, Debug)] + #[derive(Clone, Copy, Debug, IntoBytes)] + #[repr(u8)] pub enum Ack { Ok = 0, Err = 1, @@ -128,7 +138,7 @@ pub mod control { } /// The payload accompanying an [`RxCmd`]. - #[derive(Clone, Copy, Debug, Default)] + #[derive(Clone, Copy, Debug, Default, FromBytes)] #[repr(C)] pub struct Rx { /// Whether the flag signalled by the command should be enabled (`1`) @@ -143,14 +153,7 @@ pub mod control { AddrSet = 1, } - #[derive(Clone, Copy, Debug, Default)] - #[repr(C)] - pub struct Mac { - entries: u32, - mac: [u8; ETHERADDRL], - } - - #[derive(Clone, Copy, Debug, Default)] + #[derive(Clone, Copy, Debug, Default, FromBytes)] #[repr(C)] pub struct Mq { pub npairs: u16, @@ -193,7 +196,7 @@ pub mod control { pub(super) fn read_mac_list( chain: &mut super::Chain, mem: &super::MemCtx, - ) -> Result, ()> { + ) -> Result, ()> { let mut entry_count = 0u32; if !chain.read(&mut entry_count, mem) { return Err(()); @@ -204,7 +207,7 @@ pub mod control { } let mut space = - vec![[0; ETHERADDRL]; entry_count as usize].into_boxed_slice(); + vec![MacAddr::default(); entry_count as usize].into_boxed_slice(); for i in 0..space.len() { if !chain.read(&mut space[i], mem) { return Err(()); @@ -308,8 +311,8 @@ struct Inner { promisc: PromiscLevel, filter: FilterState, - unicast_mac_filters: Box<[[u8; ETHERADDRL]]>, - multicast_mac_filters: Box<[[u8; ETHERADDRL]]>, + unicast_mac_filters: Box<[MacAddr]>, + multicast_mac_filters: Box<[MacAddr]>, } impl Inner { fn new(max_queues: usize, promisc: PromiscLevel) -> Self { @@ -433,6 +436,34 @@ impl From for usize { } } +/// A MAC address. +#[derive( + Copy, + Clone, + Debug, + Default, + Eq, + PartialEq, + Ord, + PartialOrd, + FromBytes, + IntoBytes, + Immutable, +)] +pub struct MacAddr([u8; ETHERADDRL]); + +impl From<[u8; ETHERADDRL]> for MacAddr { + fn from(value: [u8; ETHERADDRL]) -> Self { + Self(value) + } +} + +impl MacAddr { + pub const fn is_unicast(&self) -> bool { + (self.0[0] & 0b1) == 0 + } +} + /// Represents a connection to the kernel's Viona (VirtIO Network Adapter) /// driver. pub struct PciVirtioViona { @@ -441,7 +472,7 @@ pub struct PciVirtioViona { indicator: lifecycle::Indicator, dev_features: u64, - mac_addr: [u8; ETHERADDRL], + mac_addr: MacAddr, mtu: Option, hdl: VionaHdl, inner: Mutex, @@ -517,13 +548,14 @@ impl PciVirtioViona { .chain([ctl_queue_size]) .collect::>(); // The vector is sized with the maximum number of rings/queues, but - // until the driver negotiates multiqueue, we only use the first two. + // until the driver negotiates multiqueue, we only use the first two + // for the datapath. The third will serve as the control queue if + // multiqueue is not negotiated, even if it is a little large for that + // purpose. let queues = VirtQueues::new_with_len(3, &queue_sizes); - if let Some(ctlq) = queues.get(2) { - ctlq.set_control(); - } let nqueues = queues.max_capacity(); hdl.set_pairs(1).unwrap(); + // Add one for config space. let msix_count = Some(1 + nqueues as u16); let (virtio_state, pci_state) = PciVirtioState::new( @@ -535,17 +567,16 @@ impl PciVirtioViona { ); let dev_features = hdl.get_avail_features()?; - let mut this = PciVirtioViona { + let this = PciVirtioViona { virtio_state, pci_state, indicator: Default::default(), dev_features, - mac_addr: [0; ETHERADDRL], + mac_addr: info.mac_addr.into(), mtu: info.mtu, hdl, inner: Mutex::new(Inner::new(nqueues, promisc_level)), }; - this.mac_addr.copy_from_slice(&info.mac_addr); let this = Arc::new(this); // Spawn the interrupt poller @@ -574,17 +605,13 @@ impl PciVirtioViona { } } - fn is_ctl_queue(&self, vq: &VirtQueue) -> bool { - usize::from(vq.id) + 1 == self.virtio_state.queues.len() - } - fn ctl_queue_notify(&self, vq: &VirtQueue) { if let Some(mem) = self.pci_state.acc_mem.access() { while !vq.avail_is_empty(&mem) { let mut chain = Chain::with_capacity(4); let intrs_en = vq.disable_intr(&mem); while let Some((_idx, _len)) = vq.pop_avail(&mut chain, &mem) { - let res = match self.ctl_msg(vq, &mut chain, &mem) { + let res = match self.ctl_msg(&mut chain, &mem) { Ok(_) => control::Ack::Ok, Err(_) => control::Ack::Err, } as u8; @@ -598,23 +625,22 @@ impl PciVirtioViona { } } - fn ctl_msg( - &self, - vq: &VirtQueue, - chain: &mut Chain, - mem: &MemCtx, - ) -> Result<(), ()> { + fn ctl_msg(&self, chain: &mut Chain, mem: &MemCtx) -> Result<(), ()> { let mut header = control::Header::default(); if !chain.read(&mut header, &mem) { return Err(()); } + probes::virtio_viona_cq_request!(|| (header.class, header.command)); + use control::Command; match Command::try_from(header).map_err(|_| ())? { Command::Rx(cmd) => self.ctl_rx(cmd, chain, mem), Command::Mac(cmd) => self.ctl_mac(cmd, chain, mem), - Command::Vlan(_) => Ok(()), - Command::Announce(_) => Ok(()), - Command::Mq(cmd) => self.ctl_mq(cmd, vq, chain, mem), + // We do not yet advertise `VIRTIO_NET_F_CTRL_VLAN`. + Command::Vlan(_) => Err(()), + // We do not yet advertise `VIRTIO_NET_F_GUEST_ANNOUNCE` + Command::Announce(_) => Err(()), + Command::Mq(cmd) => self.ctl_mq(cmd, chain, mem), } } @@ -684,18 +710,17 @@ impl PciVirtioViona { fn ctl_mq( &self, cmd: control::MqCmd, - vq: &VirtQueue, chain: &mut Chain, mem: &MemCtx, ) -> Result<(), ()> { use control::MqCmd; - let _todo = vq; match cmd { MqCmd::SetPairs => { let mut msg = control::Mq::default(); if !chain.read(&mut msg, &mem) { return Err(()); } + let npairs = msg.npairs; probes::virtio_viona_mq_set_use_pairs!(|| ( MqSetPairsCause::Commanded as u8, @@ -710,7 +735,7 @@ impl PciVirtioViona { fn net_cfg_read(&self, id: &NetReg, ro: &mut ReadOp) { match id { - NetReg::Mac => ro.write_bytes(&self.mac_addr), + NetReg::Mac => ro.write_bytes(&self.mac_addr.0), NetReg::Status => { // Always report link up ro.write_u16(VIRTIO_NET_S_LINK_UP); @@ -883,7 +908,7 @@ impl PciVirtioViona { } } - /// Set or unset unicast/multicast filters on behalf of a driver. + /// Set or unset unicast/multicast class-wide filters on behalf of a driver. fn set_filter_state( &self, filter: FilterState, @@ -913,17 +938,24 @@ impl PciVirtioViona { } } - /// Replace + /// Replace the requested set of explicit MAC address filters on a device + /// with a new table provided by the driver. fn set_mac_filters( &self, - mut unicast: Box<[[u8; ETHERADDRL]]>, - mut multicast: Box<[[u8; ETHERADDRL]]>, + mut unicast: Box<[MacAddr]>, + mut multicast: Box<[MacAddr]>, ) -> Result<(), ()> { if (self.virtio_state.negotiated_features() & VIRTIO_NET_F_CTRL_RX) == 0 { return Err(()); } + if unicast.iter().any(|v| !v.is_unicast()) + || multicast.iter().any(|v| v.is_unicast()) + { + return Err(()); + } + let mut state = self.inner.lock().unwrap(); std::mem::swap(&mut unicast, &mut state.unicast_mac_filters); @@ -968,6 +1000,9 @@ impl PciVirtioViona { return PromiscLevel::AllVlan; } + // We don't have an ioctl yet for viona to explicitly install a set of + // filters. For now, we need to apply some level of promiscuous mode + // to give the guest what it asks for. let need_mcast = state.filter.contains(FilterState::ALL_MULTICAST) || !state.multicast_mac_filters.is_empty(); @@ -1028,23 +1063,47 @@ impl VirtioDevice for PciVirtioViona { fn set_features(&self, feat: u64) -> Result<(), ()> { self.hdl.set_features(feat).map_err(|_| ())?; - if (feat & VIRTIO_NET_F_MQ) != 0 { + + // Any remaining setup is for control-queue based features. + if (feat & VIRTIO_NET_F_CTRL_VQ) == 0 { + return Ok(()); + } + + if self.virtio_state.queues.max_capacity() < 3 { + // Since we're advertising control queue support, we need + // one Rx, Tx, and CtlQ at the minimum. + return Err(()); + } + + let ctl_q_idx = if (feat & VIRTIO_NET_F_MQ) != 0 { self.hdl.set_pairs(PROPOLIS_MAX_MQ_PAIRS).map_err(|_| ())?; probes::virtio_viona_mq_set_use_pairs!(|| ( MqSetPairsCause::MqEnabled as u8, PROPOLIS_MAX_MQ_PAIRS )); self.set_use_pairs(PROPOLIS_MAX_MQ_PAIRS)?; - } + self.virtio_state.queues.max_capacity() - 1 + } else { + VIRTIO_NO_MQ_CTRL_Q_INDEX + }; + + ctl_q_idx + .try_into() + .ok() + .and_then(|i| self.virtio_state.queues.get(i)) + .map(|v| v.set_control()) + .ok_or_else(|| ())?; + if (feat & VIRTIO_NET_F_CTRL_RX) != 0 { let mut state = self.inner.lock().unwrap(); self.set_promisc(PromiscLevel::None, &mut state)?; } + Ok(()) } fn queue_notify(&self, vq: &VirtQueue) { - if self.is_ctl_queue(vq) { + if vq.is_control() { self.ctl_queue_notify(vq); return; } @@ -1714,6 +1773,7 @@ pub(crate) mod bits { pub const VIRTIO_NET_CFG_SIZE: usize = 6 + 2 + 2 + 2 + 4 + 1 + 1 + 2 + 4; } use bits::*; +use zerocopy::{FromBytes, Immutable, IntoBytes}; /// Check that available viona API matches expectations of propolis crate pub(crate) fn check_api_version() -> Result<(), crate::api_version::Error> { From 0d57644d128aba89284c496c5d4c3a9934e0d38d Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 28 Apr 2026 12:41:13 +0100 Subject: [PATCH 3/8] Actually move filter tables and associated state during migration. --- lib/propolis/src/hw/virtio/viona.rs | 56 ++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/lib/propolis/src/hw/virtio/viona.rs b/lib/propolis/src/hw/virtio/viona.rs index b8ae8ba53..1088e1fc1 100644 --- a/lib/propolis/src/hw/virtio/viona.rs +++ b/lib/propolis/src/hw/virtio/viona.rs @@ -28,6 +28,7 @@ use super::{VirtioDevice, VqChange, VqIntr}; use bit_field::BitField; use lazy_static::lazy_static; +use serde::{Deserialize, Serialize}; use tokio::io::unix::AsyncFd; use tokio::io::Interest; use tokio::sync::watch; @@ -218,6 +219,25 @@ pub mod control { } } +pub mod migrate { + use super::*; + use crate::migrate::*; + + #[derive(Deserialize, Serialize)] + pub struct VionaStateV1 { + pub promisc: PromiscLevel, + pub filter: u8, + pub unicast_mac_filters: Vec, + pub multicast_mac_filters: Vec, + } + + impl Schema<'_> for VionaStateV1 { + fn id() -> SchemaId { + ("pci-virtio-viona", 1) + } + } +} + /// Viona's in-kernel emulation of the device VirtQueues is performed in what /// are calls "vrings". Since the userspace portion of the Viona emulation is /// tasked with keeping the vring state in sync with the VirtQueue it @@ -398,8 +418,10 @@ impl Default for DeviceParams { } /// Relaxed levels of packet filtering offered by viona. -#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] -enum PromiscLevel { +#[derive( + Copy, Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize, +)] +pub enum PromiscLevel { /// The device should receive only packets for its installed MAC /// filters. /// @@ -449,6 +471,8 @@ impl From for usize { FromBytes, IntoBytes, Immutable, + Serialize, + Deserialize, )] pub struct MacAddr([u8; ETHERADDRL]); @@ -1274,7 +1298,20 @@ impl MigrateMulti for PciVirtioViona { output: &mut PayloadOutputs, ctx: &MigrateCtx, ) -> Result<(), MigrateStateError> { - ::export(self, output, ctx) + ::export(self, output, ctx)?; + + let viona_state = { + let state = self.inner.lock().unwrap(); + + migrate::VionaStateV1 { + promisc: state.promisc, + filter: state.filter.bits(), + unicast_mac_filters: state.unicast_mac_filters.to_vec(), + multicast_mac_filters: state.multicast_mac_filters.to_vec(), + } + }; + + output.push(viona_state.into()) } fn import( @@ -1291,7 +1328,18 @@ impl MigrateMulti for PciVirtioViona { )) })?; - Ok(()) + let input: migrate::VionaStateV1 = offer.take()?; + + let mut state = self.inner.lock().unwrap(); + state.filter = FilterState::from_bits_retain(input.filter); + state.unicast_mac_filters = input.unicast_mac_filters.into(); + state.unicast_mac_filters = input.multicast_mac_filters.into(); + self.set_promisc(input.promisc, &mut state).map_err(|_| { + MigrateStateError::ImportFailed(format!( + "Could not move device promisc level to {:?}.", + input.promisc + )) + }) } } From 33aa6b27e66d68c5f1b256ba4764913734dd1e07 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 22 May 2026 20:39:11 +0100 Subject: [PATCH 4/8] Review feedback. --- lib/propolis/src/hw/virtio/queue.rs | 108 +++++++++++++++++++++++----- lib/propolis/src/hw/virtio/viona.rs | 88 +++++++++++++---------- 2 files changed, 139 insertions(+), 57 deletions(-) diff --git a/lib/propolis/src/hw/virtio/queue.rs b/lib/propolis/src/hw/virtio/queue.rs index 706494e2d..66dc73da5 100644 --- a/lib/propolis/src/hw/virtio/queue.rs +++ b/lib/propolis/src/hw/virtio/queue.rs @@ -2,6 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use std::io::Error as IoError; use std::mem; use std::num::{NonZeroU16, Wrapping}; use std::sync::atomic::{fence, AtomicBool, AtomicUsize, Ordering}; @@ -350,8 +351,8 @@ impl VirtQueue { self.is_control.load(Ordering::Acquire) } - pub(super) fn set_control(&self) { - self.is_control.store(true, Ordering::Release); + fn set_control(&self, val: bool) { + self.is_control.store(val, Ordering::Release); } #[inline(always)] @@ -790,6 +791,66 @@ impl Chain { }); total == item_sz } + + pub fn read_many_owned( + &mut self, + mem: &MemCtx, + n_elements: usize, + ) -> Result, IoError> { + let required_sz = mem::size_of::() * n_elements; + if (self.read_stat.bytes_remain as usize) < required_sz { + return Err(IoError::new( + std::io::ErrorKind::InvalidData, + "Buffer too small", + )); + } + + let mut vec = Vec::with_capacity(n_elements); + + if n_elements == 0 { + return Ok(vec); + } + + // SAFETY: a `u8` has no alignment requirement so it is safe to + // construct a byte slice from another Vec. These bytes will not be read + // before initialisation. + let raw = unsafe { + std::slice::from_raw_parts_mut( + vec.spare_capacity_mut().as_ptr() as *mut u8, + required_sz, + ) + }; + + let mut done = 0; + let total = self.for_remaining_type(true, |addr, len| { + let mut remain = GuestData::from(&mut raw[done..]); + if let Some(copied) = mem.read_into(addr, &mut remain, len) { + let need_more = copied != remain.len(); + + done += copied; + (copied, need_more) + } else { + // Copy failed, so do not attempt anything else + (0, false) + } + }); + + if total == required_sz { + // SAFETY: read_many() was successful and just initialized all + // `n_elements` of the vector. + unsafe { + vec.set_len(n_elements); + } + + Ok(vec) + } else { + Err(IoError::new( + std::io::ErrorKind::UnexpectedEof, + "Insufficient bytes to complete read", + )) + } + } + /// Fetch a string of readable guest regions from the chain, provided there /// are enough to cover a specified length. pub fn readable_bufs(&mut self, len: usize) -> Option> { @@ -812,6 +873,7 @@ impl Chain { assert_eq!(remain, 0); Some(bufs) } + pub fn write( &mut self, item: &T, @@ -1034,6 +1096,17 @@ impl VirtQueues { Ok(()) } + pub fn set_ctl_queues(&self, indices: &[u16]) -> Result<(), ()> { + for vq in &self.queues { + vq.set_control(false); + } + for i in indices { + let vq = self.queues.get(usize::from(*i)).ok_or(())?; + vq.set_control(true); + } + Ok(()) + } + pub fn count(&self) -> NonZeroU16 { NonZeroU16::try_from(self.len() as u16) .expect("queue count already validated") @@ -1059,25 +1132,19 @@ impl VirtQueues { pub fn get(&self, qid: u16) -> Option<&Arc> { let len = self.len(); let qid = usize::from(qid); - // XXX: This special case is for the virtio network device, which always - // puts the control queue at the end of queue vector (see VirtIO 1.2 - // section 5.1.2). None of the other devices currently handle queues - // specially in this way, but we should come up with some better - // mechanism here. - if qid + 1 == self.max_capacity() { - Some(self.get_control()) - } else { - self.queues[..len].get(qid) - } - } - fn get_control(&self) -> &Arc { - &self.queues[self.max_capacity() - 1] + // Control queues may be placed almost arbitrarily depending on the + // device type -- they may be mixed in with other queues, or placed at + // the very end after preallocated but unused queues. + self.queues.get(qid).filter(|v| v.is_control() || qid < len) } pub fn iter(&self) -> impl std::iter::Iterator> { - let len = self.len() - 1; - self.queues[..len].iter().chain([self.get_control()]) + let len = self.len(); + self.queues + .iter() + .enumerate() + .filter_map(move |(i, v)| (i < len || v.is_control()).then_some(v)) } /// Iterate all queues the device may have used; the current number of @@ -1085,8 +1152,11 @@ impl VirtQueues { /// like device reset and teardown we must manage all viona rings /// corresponding to ever-active VirtQueues. pub fn iter_all(&self) -> impl std::iter::Iterator> { - let peak = self.peak() - 1; - self.queues[..peak].iter().chain([self.get_control()]) + let peak = self.peak(); + self.queues + .iter() + .enumerate() + .filter_map(move |(i, v)| (i < peak || v.is_control()).then_some(v)) } pub fn export(&self) -> migrate::VirtQueuesV1 { diff --git a/lib/propolis/src/hw/virtio/viona.rs b/lib/propolis/src/hw/virtio/viona.rs index 59ab27abb..040267059 100644 --- a/lib/propolis/src/hw/virtio/viona.rs +++ b/lib/propolis/src/hw/virtio/viona.rs @@ -33,6 +33,7 @@ use tokio::io::unix::AsyncFd; use tokio::io::Interest; use tokio::sync::watch; use tokio::task::JoinHandle; +use zerocopy::{FromBytes, Immutable, IntoBytes}; // Re-export API versioning interface for convenience of propolis consumers pub use viona_api::{api_version, ApiVersion}; @@ -204,19 +205,10 @@ pub mod control { return Err(()); } - if entry_count == 0 { - return Ok(Box::new([])); - } - - let mut space = - vec![MacAddr::default(); entry_count as usize].into_boxed_slice(); - for i in 0..space.len() { - if !chain.read(&mut space[i], mem) { - return Err(()); - } - } - - Ok(space) + chain + .read_many_owned(mem, entry_count as usize) + .map_err(|_| ()) + .map(Vec::into_boxed_slice) } } @@ -280,10 +272,17 @@ enum VRingState { bitflags! { /// Packet receive filters requested by a virtio NIC driver. + /// + /// Each filter requested by the guest is a discrete value of + /// [`control::RxCmd`], configured with a binary state (on/off). The device + /// is responsible for determining which traffic should be allowed and + /// filtered based on how these flags take priority over one another. We + /// represent this as a bitfield for simplicity. #[derive(Copy, Clone, Debug)] struct FilterState: u8 { - /// The driver has requested that we enter promiscuous mode, and filter no - /// packets based on MAC address. This supersedes all other active flags. + /// The driver has requested that we enter promiscuous mode, and filter + /// no packets based on MAC address. This supersedes all other active + /// flags. /// /// Requires [`VIRTIO_NET_F_CTRL_RX`]. const PROMISCUOUS = 1 << 0; @@ -297,13 +296,13 @@ bitflags! { /// /// Requires [`VIRTIO_NET_F_CTRL_RX_EXTRA`]. const ALL_UNICAST = 1 << 2; - /// The driver has requested that we drop all multicast packets, except from - /// broadcast frames. This supersedes [`Self::ALL_MULTICAST`]. + /// The driver has requested that we drop all multicast packets, except + /// from broadcast frames. This supersedes [`Self::ALL_MULTICAST`]. /// /// Requires [`VIRTIO_NET_F_CTRL_RX_EXTRA`]. const NO_MULTICAST = 1 << 3; - /// The driver has requested that we drop all unicast packets, except from - /// broadcast frames. This supersedes [`Self::ALL_UNICAST`]. + /// The driver has requested that we drop all unicast packets, except + /// from broadcast frames. This supersedes [`Self::ALL_UNICAST`]. /// /// Requires [`VIRTIO_NET_F_CTRL_RX_EXTRA`]. const NO_UNICAST = 1 << 4; @@ -574,10 +573,11 @@ impl PciVirtioViona { .collect::>(); // The vector is sized with the maximum number of rings/queues, but // until the driver negotiates multiqueue, we only use the first two - // for the datapath. The third will serve as the control queue if + // for the datapath. `queue_sizes` always contains at least three + // elements -- the third will serve as the control queue if // multiqueue is not negotiated, even if it is a little large for that // purpose. - let queues = VirtQueues::new_with_len(3, &queue_sizes); + let queues = VirtQueues::new_with_len(2, &queue_sizes); let nqueues = queues.max_capacity(); hdl.set_pairs(1).unwrap(); @@ -725,10 +725,7 @@ impl PciVirtioViona { return Ok(()); } self.hdl.set_usepairs(requested).unwrap(); - self.virtio_state - .queues - .set_len(npairs * 2 + 1) - .expect("num queue pairs"); + self.virtio_state.queues.set_len(npairs * 2).expect("num queue pairs"); Ok(()) } @@ -1011,7 +1008,13 @@ impl PciVirtioViona { state.promisc = level; Ok(()) } - Err(_) => Err(()), + Err(_) => { + // Ensure that we return to the old level of promisc. + if self.hdl.set_promisc(state.promisc).is_err() { + self.virtio_state.set_needs_reset(self); + } + Err(()) + }, } } @@ -1025,10 +1028,18 @@ impl PciVirtioViona { return PromiscLevel::AllVlan; } + // We don't yet have any mechanism to account for ALL_UNICAST or + // related filters from VIRTIO_NET_F_CTRL_RX_EXTRA, and the guest will + // not request them. + // We don't have an ioctl yet for viona to explicitly install a set of // filters. For now, we need to apply some level of promiscuous mode // to give the guest what it asks for. - let need_mcast = state.filter.contains(FilterState::ALL_MULTICAST) + // + // Even though the guest can't yet use its parent feature, we can still + // account for NO_MULTICAST here. + let need_mcast = (state.filter.contains(FilterState::ALL_MULTICAST) + && !state.filter.contains(FilterState::NO_MULTICAST)) || !state.multicast_mac_filters.is_empty(); // Don't inflict promiscuous mode on drivers which request only their @@ -1112,12 +1123,9 @@ impl VirtioDevice for PciVirtioViona { VIRTIO_NO_MQ_CTRL_Q_INDEX }; - ctl_q_idx + self.virtio_state.queues.set_ctl_queues(&[ctl_q_idx .try_into() - .ok() - .and_then(|i| self.virtio_state.queues.get(i)) - .map(|v| v.set_control()) - .ok_or_else(|| ())?; + .expect("queue index must be a valid u16")])?; if (feat & VIRTIO_NET_F_CTRL_RX) != 0 { let mut state = self.inner.lock().unwrap(); @@ -1224,9 +1232,8 @@ impl Lifecycle for PciVirtioViona { let mut state = self.inner.lock().unwrap(); state.unicast_mac_filters = Box::new([]); state.multicast_mac_filters = Box::new([]); - if let Err(_) = self.set_promisc(PromiscLevel::AllMulti, &mut state) { - eprintln!("failed to reset viona promiscuous state") - } + self.set_promisc(PromiscLevel::AllMulti, &mut state) + .expect("can reset viona promiscuous state"); } fn start(&self) -> anyhow::Result<()> { self.run(); @@ -1353,9 +1360,15 @@ impl MigrateMulti for PciVirtioViona { let input: migrate::VionaStateV1 = offer.take()?; let mut state = self.inner.lock().unwrap(); - state.filter = FilterState::from_bits_retain(input.filter); + state.filter = + FilterState::from_bits(input.filter).ok_or_else(|| { + MigrateStateError::ImportFailed(format!( + "unrecognised flags in filter state: {:x}", + input.filter & !FilterState::all().bits() + )) + })?; state.unicast_mac_filters = input.unicast_mac_filters.into(); - state.unicast_mac_filters = input.multicast_mac_filters.into(); + state.multicast_mac_filters = input.multicast_mac_filters.into(); self.set_promisc(input.promisc, &mut state).map_err(|_| { MigrateStateError::ImportFailed(format!( "Could not move device promisc level to {:?}.", @@ -1846,7 +1859,6 @@ pub(crate) mod bits { pub const VIRTIO_NET_CFG_SIZE: usize = 6 + 2 + 2 + 2 + 4 + 1 + 1 + 2 + 4; } use bits::*; -use zerocopy::{FromBytes, Immutable, IntoBytes}; /// Check that available viona API matches expectations of propolis crate pub(crate) fn check_api_version() -> Result<(), crate::api_version::Error> { From fc86ff37b1b4ebe255b9831094e710dd1b349373 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 26 May 2026 18:19:17 +0100 Subject: [PATCH 5/8] Clippy.. --- lib/propolis/src/hw/virtio/viona.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/propolis/src/hw/virtio/viona.rs b/lib/propolis/src/hw/virtio/viona.rs index 040267059..983736034 100644 --- a/lib/propolis/src/hw/virtio/viona.rs +++ b/lib/propolis/src/hw/virtio/viona.rs @@ -1014,7 +1014,7 @@ impl PciVirtioViona { self.virtio_state.set_needs_reset(self); } Err(()) - }, + } } } From 5cc7af54485971cdc69441e8a61615ac828dd8e6 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 27 May 2026 11:09:13 +0000 Subject: [PATCH 6/8] Debugging... --- lib/propolis/src/hw/virtio/viona.rs | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/lib/propolis/src/hw/virtio/viona.rs b/lib/propolis/src/hw/virtio/viona.rs index 983736034..9349c4b79 100644 --- a/lib/propolis/src/hw/virtio/viona.rs +++ b/lib/propolis/src/hw/virtio/viona.rs @@ -2333,6 +2333,7 @@ mod test { self.common_config.write_le64(common_cfg::queue_device, used_gpa); + // TODO(ky): why are we failing on this step? self.common_config.write_le16(common_cfg::queue_enable, 1); // Finally, round up so the next queue (if there is one) starts @@ -2841,17 +2842,24 @@ mod test { const TEST_VNIC: &'static str = "vnic_prop_test0"; for test in tests { let underlying_nic = underlying_nic.clone(); + eprintln!("running viona test '{}'", test.name); rt.block_on(async move { create_vnic(&underlying_nic, TEST_VNIC); - let test_ctx = - create_test_ctx(test.name, &underlying_nic, TEST_VNIC); - Lifecycle::start(test_ctx.dev.as_ref()) - .expect("can start viona device"); - let test_ctx = (test.test_fn)(test_ctx); - drop(test_ctx); - + let res = std::panic::catch_unwind(move || { + let test_ctx = + create_test_ctx(test.name, &underlying_nic, TEST_VNIC); + Lifecycle::start(test_ctx.dev.as_ref()) + .expect("can start viona device"); + let test_ctx = (test.test_fn)(test_ctx); + drop(test_ctx); + }); delete_vnic(TEST_VNIC); + if let Err(_) = res { + panic!("viona test '{}' was unsuccessful\n", test.name); + } else { + eprintln!("viona test '{}' was successful\n", test.name); + } }); } } From ed23f4bbdc5c3264fbebceb1d456d020806cae43 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 27 May 2026 18:14:22 +0100 Subject: [PATCH 7/8] Got it. --- lib/propolis/src/hw/virtio/queue.rs | 5 ++++- lib/propolis/src/hw/virtio/viona.rs | 5 +++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/propolis/src/hw/virtio/queue.rs b/lib/propolis/src/hw/virtio/queue.rs index 66dc73da5..321ce5207 100644 --- a/lib/propolis/src/hw/virtio/queue.rs +++ b/lib/propolis/src/hw/virtio/queue.rs @@ -328,7 +328,10 @@ impl VirtQueue { used.reset(); self.live.store(false, Ordering::Release); self.enabled.store(false, Ordering::Release); - self.is_control.store(false, Ordering::Release); + + // We don't modify `self.is_control` here. This allows devices to avoid, + // e.g., attempting to plumb a later `VqChange::Reset` down to the + // kernel in the case of viona. } pub(super) fn enable(&self) { diff --git a/lib/propolis/src/hw/virtio/viona.rs b/lib/propolis/src/hw/virtio/viona.rs index 9349c4b79..84579f0b3 100644 --- a/lib/propolis/src/hw/virtio/viona.rs +++ b/lib/propolis/src/hw/virtio/viona.rs @@ -235,7 +235,7 @@ pub mod migrate { /// are calls "vrings". Since the userspace portion of the Viona emulation is /// tasked with keeping the vring state in sync with the VirtQueue it /// represents, we must track its perceived state. -#[derive(Copy, Clone, Default, Eq, PartialEq)] +#[derive(Copy, Clone, Default, Eq, PartialEq, Debug)] enum VRingState { /// Initial state of the vring as it comes out of reset /// @@ -1340,7 +1340,7 @@ impl MigrateMulti for PciVirtioViona { self.hdl.set_pairs(PROPOLIS_MAX_MQ_PAIRS).unwrap(); } // Queue count is a NonZeroU16; hence `get` and -1 will not underflow. - let io_queues = self.virtio_state.queues.count().get() - 1; + let io_queues = self.virtio_state.queues.count().get(); let pairs = io_queues / 2; if !io_queues.is_multiple_of(2) { return Err(MigrateStateError::ImportFailed(format!( @@ -2646,6 +2646,7 @@ mod test { Lifecycle::reset(test_ctx.dev.as_ref()); let mut driver = test_ctx.create_driver(); driver.modern_device_init(expected_feats); + Lifecycle::reset(test_ctx.dev.as_ref()); let mut driver = test_ctx.create_driver(); driver.modern_device_init(expected_feats | VIRTIO_NET_F_MQ); From 36cb7579b1b416a4080ce1381a2db44b008f01e6 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 27 May 2026 18:49:21 +0100 Subject: [PATCH 8/8] Ensure we unset the control queue if we re-init without CTRL_VQ --- lib/propolis/src/hw/virtio/viona.rs | 54 ++++++++++++++--------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/lib/propolis/src/hw/virtio/viona.rs b/lib/propolis/src/hw/virtio/viona.rs index 84579f0b3..2767f95e2 100644 --- a/lib/propolis/src/hw/virtio/viona.rs +++ b/lib/propolis/src/hw/virtio/viona.rs @@ -1101,38 +1101,36 @@ impl VirtioDevice for PciVirtioViona { self.hdl.set_features(feat).map_err(|_| ())?; // Any remaining setup is for control-queue based features. - if (feat & VIRTIO_NET_F_CTRL_VQ) == 0 { - return Ok(()); - } - - if self.virtio_state.queues.max_capacity() < 3 { - // Since we're advertising control queue support, we need - // one Rx, Tx, and CtlQ at the minimum. - return Err(()); - } - - let ctl_q_idx = if (feat & VIRTIO_NET_F_MQ) != 0 { - self.hdl.set_pairs(PROPOLIS_MAX_MQ_PAIRS).map_err(|_| ())?; - probes::virtio_viona_mq_set_use_pairs!(|| ( - MqSetPairsCause::MqEnabled as u8, - PROPOLIS_MAX_MQ_PAIRS - )); - self.set_use_pairs(PROPOLIS_MAX_MQ_PAIRS)?; - self.virtio_state.queues.max_capacity() - 1 + let control_queue = if (feat & VIRTIO_NET_F_CTRL_VQ) == 0 { + None } else { - VIRTIO_NO_MQ_CTRL_Q_INDEX - }; + if self.virtio_state.queues.max_capacity() < 3 { + // Since we're advertising control queue support, we need + // one Rx, Tx, and CtlQ at the minimum. + return Err(()); + } - self.virtio_state.queues.set_ctl_queues(&[ctl_q_idx - .try_into() - .expect("queue index must be a valid u16")])?; + let ctl_q_idx = if (feat & VIRTIO_NET_F_MQ) != 0 { + self.hdl.set_pairs(PROPOLIS_MAX_MQ_PAIRS).map_err(|_| ())?; + probes::virtio_viona_mq_set_use_pairs!(|| ( + MqSetPairsCause::MqEnabled as u8, + PROPOLIS_MAX_MQ_PAIRS + )); + self.set_use_pairs(PROPOLIS_MAX_MQ_PAIRS)?; + self.virtio_state.queues.max_capacity() - 1 + } else { + VIRTIO_NO_MQ_CTRL_Q_INDEX + }; - if (feat & VIRTIO_NET_F_CTRL_RX) != 0 { - let mut state = self.inner.lock().unwrap(); - self.set_promisc(PromiscLevel::None, &mut state)?; - } + if (feat & VIRTIO_NET_F_CTRL_RX) != 0 { + let mut state = self.inner.lock().unwrap(); + self.set_promisc(PromiscLevel::None, &mut state)?; + } - Ok(()) + Some(ctl_q_idx.try_into().expect("queue index must be a valid u16")) + }; + + self.virtio_state.queues.set_ctl_queues(control_queue.as_slice()) } fn queue_notify(&self, vq: &VirtQueue) {