From 6711abc8b59bf18b2a28df4865286e6de2e9a5aa Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Mon, 18 May 2026 22:51:01 -0500 Subject: [PATCH] fix(fetchkit): harden local file saver symlink handling --- Cargo.lock | 1 + Cargo.toml | 3 + crates/fetchkit/Cargo.toml | 1 + crates/fetchkit/src/file_saver.rs | 261 +++++++++++++++++++-- crates/fetchkit/tests/file_saver_safety.rs | 25 ++ specs/initial.md | 2 +- specs/threat-model.md | 10 +- 7 files changed, 283 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index aa9f1c1..401ff38 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -584,6 +584,7 @@ dependencies = [ "bytes", "ed25519-dalek", "futures", + "libc", "rand 0.10.1", "reqwest", "schemars", diff --git a/Cargo.toml b/Cargo.toml index 4b1fa02..eacdf71 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,6 +52,9 @@ async-trait = "0.1" # Service trait tower = { version = "0.5", features = ["util"] } +# Platform filesystem flags +libc = "0.2" + # Crypto (bot-auth feature) ed25519-dalek = { version = "2", features = ["rand_core"] } base64 = "0.22" diff --git a/crates/fetchkit/Cargo.toml b/crates/fetchkit/Cargo.toml index 9231802..9105716 100644 --- a/crates/fetchkit/Cargo.toml +++ b/crates/fetchkit/Cargo.toml @@ -28,6 +28,7 @@ futures = { workspace = true } bytes = { workspace = true } async-trait = { workspace = true } tower = { workspace = true } +libc = { workspace = true } # Optional: bot-auth feature ed25519-dalek = { workspace = true, optional = true } diff --git a/crates/fetchkit/src/file_saver.rs b/crates/fetchkit/src/file_saver.rs index 6ebc71f..c0c3c37 100644 --- a/crates/fetchkit/src/file_saver.rs +++ b/crates/fetchkit/src/file_saver.rs @@ -6,6 +6,14 @@ //! - Tests: in-memory buffer use async_trait::async_trait; +#[cfg(unix)] +use std::ffi::{CString, OsStr}; +#[cfg(unix)] +use std::io::Write; +#[cfg(unix)] +use std::os::fd::{AsRawFd, FromRawFd, OwnedFd}; +#[cfg(unix)] +use std::os::unix::ffi::OsStrExt; use std::path::{Component, Path, PathBuf}; use thiserror::Error; @@ -124,6 +132,7 @@ impl LocalFileSaver { } } + #[cfg(not(unix))] async fn canonicalize_base_dir(&self, base: &Path) -> Result { tokio::fs::create_dir_all(base).await?; @@ -142,6 +151,7 @@ impl LocalFileSaver { Ok(tokio::fs::canonicalize(base).await?) } + #[cfg(not(unix))] async fn prepare_parent_dir(&self, resolved: &Path) -> Result { let Some(base) = &self.base_dir else { return Ok(resolved @@ -208,19 +218,38 @@ impl LocalFileSaver { Ok(current) } -} -#[async_trait] -impl FileSaver for LocalFileSaver { - async fn save(&self, path: &str, bytes: &[u8]) -> Result { - let resolved = self.resolve_path(path)?; - if let Some(base_dir) = &self.base_dir { - if resolved == normalize_path(base_dir) { - return Err(FileSaveError::PathNotAllowed( - "Path must name a file".into(), - )); - } - } + #[cfg(unix)] + async fn write_resolved_path( + &self, + resolved: PathBuf, + bytes: &[u8], + ) -> Result { + let bytes = bytes.to_vec(); + let task = if let Some(base_dir) = &self.base_dir { + let normalized_base = normalize_path(base_dir); + let relative = resolved + .strip_prefix(&normalized_base) + .map_err(|_| FileSaveError::PathNotAllowed("Path escapes base directory".into()))? + .to_path_buf(); + let base_dir = base_dir.clone(); + tokio::task::spawn_blocking(move || { + save_under_base_no_follow(&base_dir, &relative, &bytes) + }) + } else { + tokio::task::spawn_blocking(move || save_absolute_no_follow(&resolved, &bytes)) + }; + + task.await + .map_err(|err| FileSaveError::Other(format!("File save task failed: {err}")))? + } + + #[cfg(not(unix))] + async fn write_resolved_path( + &self, + resolved: PathBuf, + bytes: &[u8], + ) -> Result { let file_name = resolved .file_name() .ok_or_else(|| FileSaveError::PathNotAllowed("Path must name a file".into()))?; @@ -245,9 +274,27 @@ impl FileSaver for LocalFileSaver { Err(err) => return Err(err.into()), } - // THREAT[TM-INPUT-008]: Validate and create the final path during save, - // so symlink checks happen at write time rather than in a separate preflight step. tokio::fs::write(&final_path, bytes).await?; + Ok(final_path) + } +} + +#[async_trait] +impl FileSaver for LocalFileSaver { + async fn save(&self, path: &str, bytes: &[u8]) -> Result { + let resolved = self.resolve_path(path)?; + if let Some(base_dir) = &self.base_dir { + if resolved == normalize_path(base_dir) { + return Err(FileSaveError::PathNotAllowed( + "Path must name a file".into(), + )); + } + } + resolved + .file_name() + .ok_or_else(|| FileSaveError::PathNotAllowed("Path must name a file".into()))?; + + let final_path = self.write_resolved_path(resolved, bytes).await?; Ok(SaveResult { path: final_path.to_string_lossy().to_string(), @@ -261,6 +308,192 @@ impl FileSaver for LocalFileSaver { } } +#[cfg(unix)] +fn save_under_base_no_follow( + base: &Path, + relative: &Path, + bytes: &[u8], +) -> Result { + // Keep traversal and final creation anchored to directory descriptors so + // attacker-controlled symlink swaps cannot redirect a later path open. + std::fs::create_dir_all(base)?; + + let meta = std::fs::symlink_metadata(base)?; + if meta.file_type().is_symlink() { + return Err(FileSaveError::PathNotAllowed( + "Base directory must not be a symlink".into(), + )); + } + if !meta.is_dir() { + return Err(FileSaveError::PathNotAllowed( + "Base directory must be a directory".into(), + )); + } + + let canonical_base = std::fs::canonicalize(base)?; + let mut current_dir = open_dir_no_follow(&canonical_base)?; + + for component in relative + .parent() + .unwrap_or_else(|| Path::new("")) + .components() + { + let Component::Normal(name) = component else { + return Err(FileSaveError::PathNotAllowed(format!( + "Unsupported path component in save path: {}", + relative.display() + ))); + }; + + mkdirat_if_missing(¤t_dir, name)?; + current_dir = open_child_dir_no_follow(¤t_dir, name)?; + } + + let file_name = relative + .file_name() + .ok_or_else(|| FileSaveError::PathNotAllowed("Path must name a file".into()))?; + let final_path = canonical_base.join(relative); + let mut file = open_child_file_no_follow(¤t_dir, file_name, &final_path)?; + file.write_all(bytes)?; + + Ok(final_path) +} + +#[cfg(unix)] +fn save_absolute_no_follow(path: &Path, bytes: &[u8]) -> Result { + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + + let mut file = open_path_no_follow(path)?; + file.write_all(bytes)?; + Ok(path.to_path_buf()) +} + +#[cfg(unix)] +fn open_dir_no_follow(path: &Path) -> Result { + let path = cstring_path(path)?; + let fd = unsafe { + libc::open( + path.as_ptr(), + libc::O_RDONLY | libc::O_DIRECTORY | libc::O_CLOEXEC | libc::O_NOFOLLOW, + ) + }; + owned_fd_from_result(fd, "Refusing to traverse symlink") +} + +#[cfg(unix)] +fn open_child_dir_no_follow(parent: &OwnedFd, name: &OsStr) -> Result { + let name = cstring_component(name)?; + let fd = unsafe { + libc::openat( + parent.as_raw_fd(), + name.as_ptr(), + libc::O_RDONLY | libc::O_DIRECTORY | libc::O_CLOEXEC | libc::O_NOFOLLOW, + ) + }; + owned_fd_from_result(fd, "Refusing to traverse symlink") +} + +#[cfg(unix)] +fn open_child_file_no_follow( + parent: &OwnedFd, + name: &OsStr, + path_for_error: &Path, +) -> Result { + let name = cstring_component(name)?; + let fd = unsafe { + libc::openat( + parent.as_raw_fd(), + name.as_ptr(), + libc::O_WRONLY | libc::O_CREAT | libc::O_TRUNC | libc::O_CLOEXEC | libc::O_NOFOLLOW, + 0o666, + ) + }; + file_from_result( + fd, + format!( + "Refusing to write through symlink: {}", + path_for_error.display() + ), + ) +} + +#[cfg(unix)] +fn open_path_no_follow(path: &Path) -> Result { + let c_path = cstring_path(path)?; + let fd = unsafe { + libc::open( + c_path.as_ptr(), + libc::O_WRONLY | libc::O_CREAT | libc::O_TRUNC | libc::O_CLOEXEC | libc::O_NOFOLLOW, + 0o666, + ) + }; + file_from_result( + fd, + format!("Refusing to write through symlink: {}", path.display()), + ) +} + +#[cfg(unix)] +fn mkdirat_if_missing(parent: &OwnedFd, name: &OsStr) -> Result<(), FileSaveError> { + let name = cstring_component(name)?; + let result = unsafe { libc::mkdirat(parent.as_raw_fd(), name.as_ptr(), 0o755) }; + if result == 0 { + return Ok(()); + } + + let err = std::io::Error::last_os_error(); + if err.kind() == std::io::ErrorKind::AlreadyExists { + Ok(()) + } else { + Err(err.into()) + } +} + +#[cfg(unix)] +fn owned_fd_from_result(fd: libc::c_int, symlink_message: &str) -> Result { + if fd >= 0 { + Ok(unsafe { OwnedFd::from_raw_fd(fd) }) + } else { + Err(open_error(symlink_message.to_string())) + } +} + +#[cfg(unix)] +fn file_from_result( + fd: libc::c_int, + symlink_message: String, +) -> Result { + if fd >= 0 { + Ok(unsafe { std::fs::File::from_raw_fd(fd) }) + } else { + Err(open_error(symlink_message)) + } +} + +#[cfg(unix)] +fn open_error(symlink_message: String) -> FileSaveError { + let err = std::io::Error::last_os_error(); + if err.raw_os_error() == Some(libc::ELOOP) { + FileSaveError::PathNotAllowed(symlink_message) + } else { + FileSaveError::Io(err) + } +} + +#[cfg(unix)] +fn cstring_path(path: &Path) -> Result { + CString::new(path.as_os_str().as_bytes()) + .map_err(|_| FileSaveError::PathNotAllowed("Path contains NUL byte".into())) +} + +#[cfg(unix)] +fn cstring_component(component: &OsStr) -> Result { + CString::new(component.as_bytes()) + .map_err(|_| FileSaveError::PathNotAllowed("Path contains NUL byte".into())) +} + /// Lexically normalize a path by resolving `.` and `..` components. fn normalize_path(path: &Path) -> PathBuf { let mut components = Vec::new(); diff --git a/crates/fetchkit/tests/file_saver_safety.rs b/crates/fetchkit/tests/file_saver_safety.rs index 1d6817c..e7eec57 100644 --- a/crates/fetchkit/tests/file_saver_safety.rs +++ b/crates/fetchkit/tests/file_saver_safety.rs @@ -28,11 +28,21 @@ fn symlink_dir(src: &std::path::Path, dst: &std::path::Path) { std::os::unix::fs::symlink(src, dst).unwrap(); } +#[cfg(unix)] +fn symlink_file(src: &std::path::Path, dst: &std::path::Path) { + std::os::unix::fs::symlink(src, dst).unwrap(); +} + #[cfg(windows)] fn symlink_dir(src: &std::path::Path, dst: &std::path::Path) { std::os::windows::fs::symlink_dir(src, dst).unwrap(); } +#[cfg(windows)] +fn symlink_file(src: &std::path::Path, dst: &std::path::Path) { + std::os::windows::fs::symlink_file(src, dst).unwrap(); +} + // --------------------------------------------------------------------------- // Path traversal attacks // --------------------------------------------------------------------------- @@ -142,6 +152,21 @@ async fn test_path_traversal_symlink_escape_rejected() { assert!(!outside.path().join("escape.txt").exists()); } +#[tokio::test] +async fn test_path_traversal_final_symlink_escape_rejected() { + use fetchkit::file_saver::FileSaver; + + let base = tempfile::tempdir().unwrap(); + let outside = tempfile::NamedTempFile::new().unwrap(); + let saver = saver_in(base.path()); + + symlink_file(outside.path(), &base.path().join("out")); + + let result = saver.save("out", b"pwned").await; + assert!(result.is_err(), "final symlink escape should be rejected"); + assert_eq!(std::fs::read(outside.path()).unwrap(), b""); +} + #[tokio::test] async fn test_execute_with_saver_rejects_symlink_escape() { let base = tempfile::tempdir().unwrap(); diff --git a/specs/initial.md b/specs/initial.md index 6f6547d..f24dc55 100644 --- a/specs/initial.md +++ b/specs/initial.md @@ -258,7 +258,7 @@ When `save_to_file` is set on the request: - The `FileSaver` trait provides path validation (traversal prevention) and async save. - `LocalFileSaver` is the built-in implementation for CLI/local use: - Resolves paths relative to a configurable base directory. - - Rejects path traversal via lexical normalization (note: symlinks not resolved). + - Rejects path traversal via lexical normalization and save-time symlink checks. - Creates parent directories as needed. #### Size diff --git a/specs/threat-model.md b/specs/threat-model.md index 0902f83..b9526f8 100644 --- a/specs/threat-model.md +++ b/specs/threat-model.md @@ -251,11 +251,11 @@ matching). `http://internal.example.com` correctly does NOT match **TM-INPUT-008 — Symlink-based path traversal (MITIGATED):** `LocalFileSaver` still performs lexical normalization to block `..` traversal, but -save-time enforcement now walks each parent directory component under `base_dir`, -rejects symlinks, canonicalizes each directory after creation/use, and verifies -the canonical path stays under the canonical base directory. `execute_with_saver()` -no longer performs a separate `validate_path()` preflight, so path checks now -happen at write time instead of in a validate-then-write split. +save-time enforcement now walks each parent directory component under `base_dir` +using directory handles, rejects symlinks with no-follow opens, and opens the +final file relative to the verified parent with no-follow semantics. +`execute_with_saver()` no longer performs a separate `validate_path()` preflight, +so path checks now happen at write time instead of in a validate-then-write split. **TM-INPUT-009 — No base_dir allows arbitrary writes (ACCEPTED):** `LocalFileSaver::new(None)` only requires absolute paths, with no directory restriction.