Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ repos:
types-python-dateutil,
pydantic,
fastapi,
pytest,
"openai-agents[litellm]==0.14.6",
]
args: [--install-types, --non-interactive]
Expand Down Expand Up @@ -46,7 +47,7 @@ repos:

# Additional Python code quality checks
- repo: https://github.com/asottile/pyupgrade
rev: v3.20.0
rev: v3.21.2
hooks:
- id: pyupgrade
args: [--py312-plus]
Expand Down
4 changes: 4 additions & 0 deletions docs/advanced/configuration.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ When remote vars are set, Strix dual-writes telemetry to both local JSONL and th
Runtime backend for the sandbox environment.
</ParamField>

<ParamField path="STRIX_MAX_LOCAL_COPY_MB" default="1024" type="integer">
Maximum size (in MB) of a local directory target that Strix will copy into the sandbox file-by-file. Larger targets exit early with a suggestion to use `--mount` instead. Set to `0` to disable the check.
</ParamField>

## Sandbox Configuration

<ParamField path="STRIX_SANDBOX_EXECUTION_TIMEOUT" default="120" type="integer">
Expand Down
17 changes: 17 additions & 0 deletions docs/usage/cli.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,20 @@ strix --target <target> [options]
Target to test. Accepts URLs, repositories, local directories, domains, or IP addresses. Can be specified multiple times.
</ParamField>

<ParamField path="--mount" type="string">
Bind-mount a local directory into the sandbox (read-only) instead of copying it in file-by-file. Use this for large repositories that are too big to stream into the container. Can be specified multiple times.

Strix copies local `--target` directories into the sandbox one file at a time, which stalls on very large trees. When a local target exceeds the copy limit (see `STRIX_MAX_LOCAL_COPY_MB`, default 1024 MB) Strix exits early and asks you to re-run with `--mount`.

<Note>
The mount is read-only to protect your source from accidental modification. This is not a hard security boundary: a root process inside the container can remount it writable, so treat `--mount` as "scan my own code", not as isolation from untrusted code.
</Note>

<Note>
The size pre-flight only covers local directory targets. Remote repositories (cloned at scan time) are not size-checked.
</Note>
</ParamField>

<ParamField path="--instruction" type="string">
Custom instructions for the scan. Use for credentials, focus areas, or specific testing approaches.
</ParamField>
Expand Down Expand Up @@ -63,6 +77,9 @@ strix -n --target ./ --scan-mode quick --scope-mode diff --diff-base origin/main

# Multi-target white-box testing
strix -t https://github.com/org/app -t https://staging.example.com

# Large local repository — bind-mount instead of copying it in
strix --mount ./huge-monorepo
```

## Exit Codes
Expand Down
9 changes: 9 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,13 @@ dev = [
"bandit>=1.8.3",
"pre-commit>=4.2.0",
"pyinstaller>=6.17.0; python_version >= '3.12' and python_version < '3.15'",
"pytest>=8.3",
"pytest-asyncio>=0.24",
]

[tool.pytest.ini_options]
asyncio_mode = "auto"

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
Expand Down Expand Up @@ -104,6 +109,10 @@ module = [
ignore_missing_imports = true
disable_error_code = ["import-untyped"]

[[tool.mypy.overrides]]
module = ["tests.*"]
disallow_untyped_decorators = false

# ============================================================================
# Ruff Configuration (Fast Python Linter & Formatter)
# ============================================================================
Expand Down
5 changes: 5 additions & 0 deletions strix/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ class RuntimeSettings(BaseSettings):
alias="STRIX_IMAGE",
)
backend: str = Field(default="docker", alias="STRIX_RUNTIME_BACKEND")
# Hard cap on a local target's size before we refuse to stream it into the
# sandbox file-by-file (the SDK copies every file individually, which stalls
# on large repos). Above this, the user must bind-mount via ``--mount``.
# Set to 0 (or less) to disable the pre-flight check entirely.
max_local_copy_mb: int = Field(default=1024, alias="STRIX_MAX_LOCAL_COPY_MB")


class TelemetrySettings(BaseSettings):
Expand Down
3 changes: 2 additions & 1 deletion strix/core/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ def build_root_task(scan_config: dict[str, Any]) -> str:
)
elif ttype == "local_code":
path = details.get("target_path", "unknown")
sections["Local Codebases"].append(f"- {path} (available at: {workspace_path})")
suffix = ", read-only mount" if details.get("mount") else ""
sections["Local Codebases"].append(f"- {path} (available at: {workspace_path}{suffix})")
elif ttype == "web_application":
sections["URLs"].append(f"- {details.get('target_url', '')}")
elif ttype == "ip_address":
Expand Down
2 changes: 1 addition & 1 deletion strix/core/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ async def run_strix_scan(
scan_config: dict[str, Any],
scan_id: str | None = None,
image: str,
local_sources: list[dict[str, str]] | None = None,
local_sources: list[dict[str, Any]] | None = None,
coordinator: AgentCoordinator | None = None,
interactive: bool = False,
max_turns: int = DEFAULT_MAX_TURNS,
Expand Down
46 changes: 41 additions & 5 deletions strix/interface/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,12 @@
from strix.interface.utils import (
assign_workspace_subdirs,
build_final_stats_text,
build_mount_targets_info,
check_docker_connection,
clone_repository,
collect_local_sources,
dedupe_local_targets,
find_oversized_local_targets,
generate_run_name,
image_exists,
infer_target_type,
Expand Down Expand Up @@ -317,6 +320,9 @@ def parse_arguments() -> argparse.Namespace:
# Local code analysis
strix --target ./my-project

# Large local repository (bind-mounted read-only instead of copied)
strix --mount ./huge-monorepo

# Domain penetration test
strix --target example.com

Expand Down Expand Up @@ -352,6 +358,15 @@ def parse_arguments() -> argparse.Namespace:
"Can be specified multiple times for multi-target scans. "
"Required for fresh runs; loaded from disk when ``--resume`` is set.",
)
parser.add_argument(
"--mount",
type=str,
action="append",
metavar="PATH",
help="Bind-mount a local directory into the sandbox (read-only) instead of "
"copying it file-by-file. Use this for large repositories that are too big to "
"stream into the container. Can be specified multiple times.",
)
parser.add_argument(
"--instruction",
type=str,
Expand Down Expand Up @@ -455,9 +470,9 @@ def parse_arguments() -> argparse.Namespace:
args.user_explicit_instruction = args.instruction if args.resume else None

if args.resume:
if args.target:
if args.target or args.mount:
parser.error(
"Cannot combine --resume with --target. --resume picks up where "
"Cannot combine --resume with --target/--mount. --resume picks up where "
"the prior run left off, including the original target list."
)
_load_resume_state(args, parser)
Expand All @@ -470,13 +485,13 @@ def parse_arguments() -> argparse.Namespace:
f"or remove --resume to start over with the same targets."
)
else:
if not args.target:
if not args.target and not args.mount:
parser.error(
"the following arguments are required: -t/--target "
"the following arguments are required: -t/--target or --mount "
"(or use --resume <run_name> to continue a prior scan)"
)
args.targets_info = []
for target in args.target:
for target in args.target or []:
try:
target_type, target_dict = infer_target_type(target)

Expand All @@ -491,9 +506,30 @@ def parse_arguments() -> argparse.Namespace:
except ValueError:
parser.error(f"Invalid target '{target}'")

try:
args.targets_info.extend(build_mount_targets_info(args.mount or []))
except ValueError as e:
parser.error(str(e))

args.targets_info = dedupe_local_targets(args.targets_info)

assign_workspace_subdirs(args.targets_info)
rewrite_localhost_targets(args.targets_info, HOST_GATEWAY_HOSTNAME)

max_local_copy_mb = load_settings().runtime.max_local_copy_mb
max_copy_bytes = max_local_copy_mb * 1024 * 1024
oversized = find_oversized_local_targets(args.targets_info, max_copy_bytes)
if oversized:
details = "; ".join(
f"{path} ({size / (1024 * 1024):.0f} MB)" for path, size in oversized
)
parser.error(
f"Local target too large to stream into the sandbox: {details}. "
f"The limit is {max_local_copy_mb} MB "
"(set STRIX_MAX_LOCAL_COPY_MB to change it). Re-run with "
"--mount <path> to bind-mount the directory instead of copying it."
)

return args


Expand Down
123 changes: 121 additions & 2 deletions strix/interface/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import ipaddress
import json
import logging
import os
import re
import secrets
Expand All @@ -23,6 +24,9 @@
from strix.config import load_settings


logger = logging.getLogger(__name__)


def get_severity_color(severity: str) -> str:
severity_colors = {
"critical": "#dc2626",
Expand Down Expand Up @@ -1185,8 +1189,8 @@ def is_whitebox_scan(targets_info: list[dict[str, Any]]) -> bool:
return any(t.get("type") == "local_code" for t in targets_info or [])


def collect_local_sources(targets_info: list[dict[str, Any]]) -> list[dict[str, str]]:
local_sources: list[dict[str, str]] = []
def collect_local_sources(targets_info: list[dict[str, Any]]) -> list[dict[str, Any]]:
local_sources: list[dict[str, Any]] = []

for target_info in targets_info:
details = target_info["details"]
Expand All @@ -1197,6 +1201,7 @@ def collect_local_sources(targets_info: list[dict[str, Any]]) -> list[dict[str,
{
"source_path": details["target_path"],
"workspace_subdir": workspace_subdir,
"mount": bool(details.get("mount", False)),
}
)

Expand All @@ -1205,12 +1210,126 @@ def collect_local_sources(targets_info: list[dict[str, Any]]) -> list[dict[str,
{
"source_path": details["cloned_repo_path"],
"workspace_subdir": workspace_subdir,
"mount": False,
}
)

return local_sources


def directory_size_bytes(path: Path) -> int:
"""Total size in bytes of regular files under ``path`` (symlinks not followed).

Best-effort: files that disappear or can't be stat'd mid-walk are skipped.
Used as a cheap (stat-only) pre-flight to estimate the cost of streaming a
local target into the sandbox before we actually try to copy it.

Directories that can't be listed (e.g. permission denied) are logged and
skipped rather than silently dropped — so an under-count is at least
visible — but the returned total then excludes their contents.
"""

def _on_walk_error(error: OSError) -> None:
logger.warning("Could not read %s while measuring size: %s", error.filename, error)

total = 0
for root, _dirs, files in os.walk(path, followlinks=False, onerror=_on_walk_error):
for name in files:
file_path = os.path.join(root, name) # noqa: PTH118
try:
if os.path.islink(file_path): # noqa: PTH114
continue
total += os.path.getsize(file_path) # noqa: PTH202
except OSError:
continue
return total


def find_oversized_local_targets(
targets_info: list[dict[str, Any]], max_bytes: int
) -> list[tuple[str, int]]:
"""Return ``(path, size_bytes)`` for non-mounted local targets over ``max_bytes``.

Mounted targets are bind-mounted rather than copied, so their size is
irrelevant and they are excluded. A ``max_bytes`` of zero or less disables
the check entirely (returns no targets).
"""
if max_bytes <= 0:
return []
oversized: list[tuple[str, int]] = []
for target in targets_info:
if target.get("type") != "local_code":
continue
details = target.get("details") or {}
if details.get("mount"):
continue
target_path = details.get("target_path")
if not target_path:
continue
size = directory_size_bytes(Path(target_path))
if size > max_bytes:
oversized.append((target_path, size))
return oversized


def build_mount_targets_info(mount_paths: list[str]) -> list[dict[str, Any]]:
"""Build ``targets_info`` entries for ``--mount`` directories.

Each path must be an existing local directory; it is bind-mounted into the
sandbox (read-only) instead of being copied file-by-file. Raises
``ValueError`` for an empty path, or one that does not exist or is not a
directory.
"""
targets_info: list[dict[str, Any]] = []
for raw in mount_paths:
if not raw or not raw.strip():
raise ValueError("--mount path must not be empty.")
path = Path(raw).expanduser()
try:
resolved = path.resolve()
is_dir = resolved.is_dir()
except (OSError, RuntimeError) as e:
raise ValueError(f"Invalid mount path '{raw}': {e!s}") from e
if not is_dir:
raise ValueError(
f"Mount path '{raw}' is not an existing directory. "
"--mount requires a path to a local directory."
)
targets_info.append(
{
"type": "local_code",
"details": {"target_path": str(resolved), "mount": True},
"original": str(resolved),
}
)
return targets_info


def dedupe_local_targets(targets_info: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Collapse local_code targets that resolve to the same path.

When a directory is supplied both as a copied ``--target`` and via
``--mount`` (or as duplicate values of either), keep one entry and prefer
the bind-mounted one — so the same tree is never both streamed in and
mounted. Order is preserved; non-local targets pass through untouched.
"""
result: list[dict[str, Any]] = []
index_by_path: dict[str, int] = {}
for target in targets_info:
details = target.get("details") or {}
path = details.get("target_path")
if target.get("type") != "local_code" or not path:
result.append(target)
continue
existing = index_by_path.get(path)
if existing is None:
index_by_path[path] = len(result)
result.append(target)
elif details.get("mount") and not (result[existing].get("details") or {}).get("mount"):
result[existing] = target # bind mount supersedes the copied entry
return result


def _is_localhost_host(host: str) -> bool:
host_lower = host.lower().strip("[]")

Expand Down
2 changes: 1 addition & 1 deletion strix/report/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def read_run_record(run_dir: Path) -> dict[str, Any]:
except (OSError, json.JSONDecodeError) as exc:
raise RuntimeError(f"run.json at {path} is unreadable: {exc}") from exc
if not isinstance(data, dict):
raise RuntimeError(f"run.json at {path} is not an object")
raise TypeError(f"run.json at {path} is not an object")
return data


Expand Down
Loading