diff --git a/Cargo.lock b/Cargo.lock index 9f1fde31d..9bfddfcd7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6684,6 +6684,7 @@ dependencies = [ "oximeter", "p9ds", "paste", + "pbind", "pin-project-lite", "propolis_types 0.0.0", "rand 0.9.2", diff --git a/bin/propolis-server/src/lib/vcpu_tasks.rs b/bin/propolis-server/src/lib/vcpu_tasks.rs index e21ffc027..d2b2d4dd7 100644 --- a/bin/propolis-server/src/lib/vcpu_tasks.rs +++ b/bin/propolis-server/src/lib/vcpu_tasks.rs @@ -75,7 +75,7 @@ impl VcpuTasks { let thread = std::thread::Builder::new() .name(format!("vcpu-{}", vcpu.id)) .spawn(move || { - if let Some(bind_cpu) = bind_cpu { + if bind_cpu.is_some() { pbind::bind_lwp(bind_cpu) .expect("can bind to specified CPU"); } diff --git a/bin/propolis-standalone/src/main.rs b/bin/propolis-standalone/src/main.rs index 3b3eec631..199e06625 100644 --- a/bin/propolis-standalone/src/main.rs +++ b/bin/propolis-standalone/src/main.rs @@ -328,7 +328,7 @@ impl Instance { let _ = std::thread::Builder::new() .name(format!("vcpu-{}", vcpu.id)) .spawn(move || { - if let Some(bind_cpu) = bind_cpu { + if bind_cpu.is_some() { pbind::bind_lwp(bind_cpu).expect("can bind vcpu"); } Instance::vcpu_loop(inner, vcpu.as_ref(), &task, task_log) diff --git a/crates/pbind/src/lib.rs b/crates/pbind/src/lib.rs index 62f7eabe3..6fb6f81ec 100644 --- a/crates/pbind/src/lib.rs +++ b/crates/pbind/src/lib.rs @@ -63,8 +63,13 @@ pub fn online_cpus() -> Result { } #[cfg(target_os = "illumos")] -/// Bind the current LWP to the specified processor. -pub fn bind_lwp(bind_cpu: processorid_t) -> Result<(), Error> { +/// Bind this LWP to the specified processor. +/// +/// Returns the previous processor binding for this LWP, if any was set, +/// otherwise `None` +pub fn bind_lwp( + bind_cpu: Option, +) -> Result, Error> { extern "C" { fn processor_bind( idtype: idtype_t, @@ -77,12 +82,19 @@ pub fn bind_lwp(bind_cpu: processorid_t) -> Result<(), Error> { // From ``. const P_MYID: id_t = -1; + // From `common/sys/processor.h`: sentinel indicating + // > "LWP/thread is not bound" + const PBIND_NONE: processorid_t = -1; + + let newbind: processorid_t = bind_cpu.unwrap_or(PBIND_NONE); + let mut obind: processorid_t = PBIND_NONE; + let res = unsafe { processor_bind( IdType::P_LWPID as i32, P_MYID, - bind_cpu, - std::ptr::null_mut(), + newbind, + &mut obind as *mut processorid_t, ) }; @@ -90,13 +102,33 @@ pub fn bind_lwp(bind_cpu: processorid_t) -> Result<(), Error> { return Err(Error::last_os_error()); } - Ok(()) + let oldproc = if obind != PBIND_NONE { Some(obind) } else { None }; + + Ok(oldproc) } #[cfg(not(target_os = "illumos"))] /// On non-illumos targets, we're not actually running a VM. We do need the /// crate to compile to be nicer for blanket `cargo test` invocations on other /// platforms. So a no-op function will do. -pub fn bind_lwp(_bind_cpu: processorid_t) -> Result<(), Error> { - Ok(()) +pub fn bind_lwp( + _bind_cpu: Option, +) -> Result, Error> { + Ok(None) +} + +/// Run the provided function without any processor binding active on the +/// current LWP. The LWP's original processor binding is restored after the +/// function returns. +/// +/// If the function panics, the LWP's original processor binding will not be +/// restored. +pub fn with_unbound_lwp(f: impl FnOnce() -> T) -> T { + let oldbind = bind_lwp(None).expect("can unbind this LWP"); + + let res = f(); + + bind_lwp(oldbind).expect("can re-bind this LWP as it was"); + + res } diff --git a/lib/propolis/Cargo.toml b/lib/propolis/Cargo.toml index 464a293db..813a47f65 100644 --- a/lib/propolis/Cargo.toml +++ b/lib/propolis/Cargo.toml @@ -19,6 +19,7 @@ cpuid_utils.workspace = true dladm.workspace = true viona_api.workspace = true propolis_types.workspace = true +pbind.workspace = true usdt = { workspace = true, features = ["asm"] } tokio = { workspace = true, features = ["full"] } futures.workspace = true diff --git a/lib/propolis/src/hw/virtio/viona.rs b/lib/propolis/src/hw/virtio/viona.rs index 9a7f9bce4..0d7fb04a6 100644 --- a/lib/propolis/src/hw/virtio/viona.rs +++ b/lib/propolis/src/hw/virtio/viona.rs @@ -1071,12 +1071,30 @@ impl VionaHdl { fn ring_init(&self, vq: &VirtQueue) -> io::Result<()> { if !vq.is_control() { let mut vna_ring_init = viona_api::vioc_ring_init_modern::from(vq); - unsafe { + + // Gross: `VNA_IOC_RING_INIT*` will have viona go and create an LWP + // in our process for the vring worker. It will inherit the current + // LWP's processor binding. ring_init is typically called from a + // vCPU LWP in service of an MMIO to activate the ring. These facts + // collaborate to get the worker LWP bound to the same host CPU as + // happens to be running the vCPU that set the NIC running. + // + // Because the guest probably goes and enables all the rings on one + // CPU as part of some driver operation, if we don't intervene here + // it's likely all the worker threads for the vNIC will be bound to + // the same core. We don't actually know, from the device side, if + // the guest will go and set up the rest of the rings right now, so + // we have to do the unbind/bind dance for each ring. + // + // Arguably one might not want to do such operations directly on a + // vCPU thread. Device setup isn't exactly on anyone's hot path so + // we'll live. + pbind::with_unbound_lwp(|| unsafe { self.0.ioctl( viona_api::VNA_IOC_RING_INIT_MODERN, &mut vna_ring_init, - )?; - } + ) + })?; } Ok(()) }