diff --git a/src/linux/mem.rs b/src/linux/mem.rs index 08f2722..d220690 100644 --- a/src/linux/mem.rs +++ b/src/linux/mem.rs @@ -181,11 +181,18 @@ impl ProcessVirtualMemory { let mut remaining_written = if libcret == -1 { 0 } else { libcret as usize }; - for (liof, (_, meta)) in iov_local - .iter() - .take(cnt) - .zip(iov_remote.iter().zip(self.temp_meta.iter())) - { + // The syscall above operated on the window [win, win + cnt), + // so result dispatch and byte accounting must start at `win` + // too. `offset` is advanced inside the loop, so snapshot it + // before iterating. + let win = offset; + + for (liof, (_, meta)) in iov_local.iter().skip(win).take(cnt).zip( + iov_remote + .iter() + .skip(win) + .zip(self.temp_meta.iter().skip(win)), + ) { offset += 1; let to_write = remaining_written; @@ -250,3 +257,87 @@ impl MemoryView for ProcessVirtualMemory { } } } + +#[cfg(test)] +mod tests { + use super::*; + use memflow::cglue::CTup2; + + fn vmem_for_pid(pid: pid_t) -> ProcessVirtualMemory { + const IOV_MAX: usize = 1024; + ProcessVirtualMemory { + pid, + temp_iov: vec![ + IoSendVec(iovec { + iov_base: std::ptr::null_mut::(), + iov_len: 0, + }); + IOV_MAX * 2 + ] + .into_boxed_slice(), + temp_meta: vec![Address::INVALID; IOV_MAX].into_boxed_slice(), + } + } + + // Regression test for the partial-transfer retry window in `process_rw`. + // + // A batched read of [valid, unmapped, valid] forces `process_vm_readv` to transfer + // the first region, fault on the middle one, and require a retry for the third. + // Before the `.skip(win)` fix the retry dispatched from index 0, re-reporting the + // first region and silently dropping the third. Reading from our own PID lets us + // exercise this without spawning a child. + #[test] + fn partial_read_across_hole_reports_each_region_once() { + let src_a = [0xAAu8; 8]; + let src_c = [0xCCu8; 8]; + + let addr_a = Address::from(src_a.as_ptr() as u64); + let addr_c = Address::from(src_c.as_ptr() as u64); + // Below the default mmap_min_addr, so reliably unmapped (EFAULT on read). + let addr_bad = Address::from(0x1000u64); + + let mut dst_a = [0u8; 8]; + let mut dst_b = [0u8; 8]; + let mut dst_c = [0u8; 8]; + + let mut ok: Vec<(Address, Vec)> = Vec::new(); + let mut fail: Vec
= Vec::new(); + + { + let inp = vec![ + CTup2(addr_a, (&mut dst_a[..]).into()), + CTup2(addr_bad, (&mut dst_b[..]).into()), + CTup2(addr_c, (&mut dst_c[..]).into()), + ]; + + let mut ok_cb = |CTup2(a, d): ReadData| { + ok.push((a, d.to_vec())); + true + }; + let mut fail_cb = |CTup2(a, _): ReadData| { + fail.push(a); + true + }; + let mut ok_oc: ReadCallback = (&mut ok_cb).into(); + let mut fail_oc: ReadCallback = (&mut fail_cb).into(); + + let mut mem = vmem_for_pid(unsafe { libc::getpid() }); + mem.read_iter(inp.into_iter(), Some(&mut ok_oc), Some(&mut fail_oc)) + .unwrap(); + } + + ok.sort_by_key(|(a, _)| a.to_umem()); + let mut expected = vec![(addr_a, vec![0xAAu8; 8]), (addr_c, vec![0xCCu8; 8])]; + expected.sort_by_key(|(a, _)| a.to_umem()); + + assert_eq!( + ok, expected, + "each readable region must be reported exactly once with correct data" + ); + assert_eq!( + fail, + vec![addr_bad], + "the unmapped region must be the only failure" + ); + } +} diff --git a/src/linux/mod.rs b/src/linux/mod.rs index 8105b51..c4c58ca 100644 --- a/src/linux/mod.rs +++ b/src/linux/mod.rs @@ -8,12 +8,61 @@ use procfs::KernelModule; use itertools::Itertools; +use std::collections::hash_map::DefaultHasher; +use std::hash::{Hash, Hasher}; + pub mod mem; use mem::ProcessVirtualMemory; pub mod process; +use process::process_state; pub use process::LinuxProcess; +/// Architecture of the host the backend is running on. +/// +/// memflow-native works through native syscalls, so the inspected processes always run +/// under the same kernel/ISA as this build. We therefore report the compile target's +/// architecture rather than assuming x86-64. 32-bit processes running under a 64-bit +/// kernel are still reported as 64-bit here; distinguishing them would require sniffing +/// the ELF class of `/proc//exe`. +fn host_arch() -> ArchitectureIdent { + #[cfg(target_arch = "x86_64")] + { + ArchitectureIdent::X86(64, false) + } + #[cfg(target_arch = "x86")] + { + ArchitectureIdent::X86(32, false) + } + #[cfg(target_arch = "aarch64")] + { + // Page size is read at runtime; only 4k is currently supported by memflow. + let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) }; + ArchitectureIdent::AArch64(if page_size > 0 { + page_size as usize + } else { + 0x1000 + }) + } + #[cfg(not(any(target_arch = "x86_64", target_arch = "x86", target_arch = "aarch64")))] + { + ArchitectureIdent::Unknown(0) + } +} + +/// Stable, ordering-independent handle for a kernel module, derived from its name. +/// `procfs::KernelModule` exposes no kernel address we could reuse, so hashing the name +/// keeps the handle valid across module load/unload churn (unlike a list index). +/// +/// `DefaultHasher` is fixed-seeded (unlike the randomized `RandomState` behind +/// `HashMap`), so the handle is consistent across the two lookups within a process run, +/// which is all this handle needs. +fn module_handle(name: &str) -> Address { + let mut hasher = DefaultHasher::new(); + name.hash(&mut hasher); + Address::from(hasher.finish()) +} + pub struct LinuxOs { info: OsInfo, } @@ -46,7 +95,7 @@ impl Default for LinuxOs { let info = OsInfo { base: Address::NULL, size: 0, - arch: ArchitectureIdent::X86(64, false), + arch: host_arch(), }; Self { info } @@ -108,15 +157,17 @@ impl Os for LinuxOs { let path = path.into(); + let arch = host_arch(); + Ok(ProcessInfo { address: (proc.pid() as umem).into(), pid, command_line, path, name, - sys_arch: ArchitectureIdent::X86(64, false), - proc_arch: ArchitectureIdent::X86(64, false), - state: ProcessState::Alive, + sys_arch: arch, + proc_arch: arch, + state: process_state(pid as pid_t), // dtb is not known/used here dtb1: Address::invalid(), dtb2: Address::invalid(), @@ -145,8 +196,9 @@ impl Os for LinuxOs { fn module_address_list_callback(&mut self, mut callback: AddressCallback) -> Result<()> { let modules = self.kernel_modules_sorted()?; - (0..modules.len()) - .map(Address::from) + modules + .iter() + .map(|km| module_handle(&km.name)) .take_while(|a| callback.call(*a)) .for_each(|_| {}); @@ -161,7 +213,8 @@ impl Os for LinuxOs { let modules = self.kernel_modules_sorted()?; modules - .get(address.to_umem() as usize) + .iter() + .find(|km| module_handle(&km.name) == address) .map(|km| ModuleInfo { address, size: km.size as umem, diff --git a/src/linux/process.rs b/src/linux/process.rs index 0df66e0..5422109 100644 --- a/src/linux/process.rs +++ b/src/linux/process.rs @@ -115,23 +115,73 @@ impl LinuxProcess { let data = std::fs::read(path) .map_err(|_| Error(ErrorOrigin::OsLayer, ErrorKind::EnvarNotFound))?; + // /proc//environ is exactly the memory range env_start..env_end, + // so in-process addresses can be derived from byte offsets within it. + let env_start = self + .proc_handle() + .ok() + .and_then(|p| p.stat().ok()) + .and_then(|stat| stat.env_start); + let mut out = Vec::new(); - for entry in data.split(|b| *b == 0).filter(|entry| !entry.is_empty()) { - let entry = String::from_utf8_lossy(entry); - if let Some((name, value)) = entry.split_once('=') { - out.push(EnvVarInfo { - name: ReprCString::from(name), - value: ReprCString::from(value), - address: Address::NULL, - arch: self.info.proc_arch, - }); + let mut offset = 0u64; + for entry in data.split(|b| *b == 0) { + let entry_len = entry.len() as u64; + if !entry.is_empty() { + let entry = String::from_utf8_lossy(entry); + if let Some((name, value)) = entry.split_once('=') { + out.push(EnvVarInfo { + name: ReprCString::from(name), + value: ReprCString::from(value), + address: env_start + .map(|start| Address::from(start + offset)) + .unwrap_or(Address::NULL), + arch: self.info.proc_arch, + }); + } } + offset += entry_len + 1; } Ok(out) } } +/// Decodes a waitpid(2)-style status word (as exposed in `/proc//stat` field 52) +/// into a memflow [`ExitCode`]. +/// +/// A normal exit reports its `exit(3)` code; a process killed by a signal reports the +/// negated signal number so the two cases stay distinguishable. +fn decode_exit_code(status: i32) -> i32 { + if status & 0x7f == 0 { + // WIFEXITED: low 7 bits clear -> exit code is in bits 8..16. + (status >> 8) & 0xff + } else { + // WIFSIGNALED: low 7 bits carry the terminating signal. + -(status & 0x7f) + } +} + +/// Resolves the [`ProcessState`] of `pid` from `/proc//stat`. +/// +/// Zombie/dead states map to [`ProcessState::Dead`] (carrying the decoded exit code +/// when the kernel exposes it); a vanished PID is treated as reaped (`Dead(0)`), and a +/// stat that fails for any other reason (e.g. permissions) stays [`ProcessState::Unknown`]. +pub(crate) fn process_state(pid: pid_t) -> ProcessState { + match procfs::process::Process::new(pid).and_then(|p| p.stat()) { + Ok(stat) => match stat.state { + // Z = zombie (exited, unreaped), X/x = dead + 'Z' | 'X' | 'x' => { + ProcessState::Dead(stat.exit_code.map(decode_exit_code).unwrap_or(0)) + } + _ => ProcessState::Alive, + }, + // /proc/ vanished -> the process was reaped; exit code is no longer recoverable + Err(procfs::ProcError::NotFound(_)) => ProcessState::Dead(0), + Err(_) => ProcessState::Unknown, + } +} + cglue_impl_group!(LinuxProcess, ProcessInstance, {}); cglue_impl_group!(LinuxProcess, IntoProcessInstance, {}); @@ -152,12 +202,13 @@ impl Process for LinuxProcess { module_maps .iter() - .enumerate() .filter(|_| target_arch.is_none() || Some(&self.info().sys_arch) == target_arch) - .take_while(|(i, _)| { + .take_while(|map| { callback.call(ModuleAddressInfo { - address: Address::from(*i as u64), - arch: self.info.proc_arch, + address: Address::from(map.address.0), + // Match `module_by_address`, which keys on `sys_arch`; the maps are + // also filtered above by `sys_arch`. + arch: self.info.sys_arch, }) }) .for_each(|_| {}); @@ -182,7 +233,8 @@ impl Process for LinuxProcess { let module_maps = self.module_maps()?; module_maps - .get(address.to_umem() as usize) + .iter() + .find(|map| Address::from(map.address.0) == address) .map(|map| ModuleInfo { address, parent_process: self.info.address, @@ -223,8 +275,23 @@ impl Process for LinuxProcess { /// /// This will generally be for the initial executable that was run fn primary_module_address(&mut self) -> Result
{ - // TODO: Is it always 0th mod? - Ok(Address::from(0)) + let exe = self.proc_handle()?.exe().ok(); + let module_maps = self.module_maps()?; + + if let Some(exe) = exe { + if let Some(map) = module_maps + .iter() + .find(|m| matches!(&m.pathname, MMapPath::Path(p) if *p == exe)) + { + return Ok(Address::from(map.address.0)); + } + } + + // exe link unreadable or unmatched (e.g. deleted binary) - fall back to first mapping + module_maps + .first() + .map(|m| Address::from(m.address.0)) + .ok_or(Error(ErrorOrigin::OsLayer, ErrorKind::NotFound)) } /// Retrieves the process info @@ -234,7 +301,7 @@ impl Process for LinuxProcess { /// Retrieves the state of the process fn state(&mut self) -> ProcessState { - ProcessState::Unknown + process_state(self.pid) } /// Changes the dtb this process uses for memory translations. @@ -321,8 +388,13 @@ impl Process for LinuxProcess { #[cfg(memflow_plugin_api = "2")] fn environment_block_address(&mut self, _architecture: ArchitectureIdent) -> Result
{ - // Linux does not expose a stable public env-block pointer through procfs. - Ok(Address::NULL) + // env_start is only exposed on kernel >= 3.5 and may be hidden by permission checks. + self.proc_handle()? + .stat() + .map_err(|_| Error(ErrorOrigin::OsLayer, ErrorKind::UnableToReadFile))? + .env_start + .map(Address::from) + .ok_or(Error(ErrorOrigin::OsLayer, ErrorKind::NotSupported)) } #[cfg(memflow_plugin_api = "2")] @@ -349,3 +421,25 @@ impl MemoryView for LinuxProcess { self.virt_mem.metadata() } } + +#[cfg(test)] +mod tests { + use super::decode_exit_code; + + #[test] + fn normal_exit_decodes_to_exit_code() { + // exit(0) and exit(3) as reported by waitpid: code in bits 8..16. + assert_eq!(decode_exit_code(0x0000), 0); + assert_eq!(decode_exit_code(3 << 8), 3); + assert_eq!(decode_exit_code(255 << 8), 255); + } + + #[test] + fn signalled_exit_decodes_to_negative_signal() { + // Killed by SIGKILL (9) / SIGSEGV (11): low 7 bits carry the signal. + assert_eq!(decode_exit_code(9), -9); + assert_eq!(decode_exit_code(11), -11); + // Core-dump flag (0x80) must not bleed into the signal number. + assert_eq!(decode_exit_code(11 | 0x80), -11); + } +}