diff --git a/agents/README.md b/agents/README.md
deleted file mode 100644
index 94bf49b..0000000
--- a/agents/README.md
+++ /dev/null
@@ -1,12 +0,0 @@
----
-name: rune-agents-readme
-description: Documentation for Rune agents.
----
-
-# Rune Agents
-
-This directory contains the agent specifications for Rune:
-
-- **Scribe**: For capturing organizational context.
-- **Retriever**: For searching and synthesizing organizational context.
-- **Agent adapters**: `claude/`, `gemini/`, and `codex/` contain client-specific prompt variants where needed.
diff --git a/agents/SLACK_SETUP.md b/agents/SLACK_SETUP.md
deleted file mode 100644
index d2bf3a5..0000000
--- a/agents/SLACK_SETUP.md
+++ /dev/null
@@ -1,9 +0,0 @@
----
-name: rune-slack-setup
-description: Instructions for setting up the Rune Slack app.
----
-
-# Slack Setup Guide
-
-To set up the Rune Slack app, follow these steps:
-...
diff --git a/agents/__init__.py b/agents/__init__.py
deleted file mode 100644
index 6aba734..0000000
--- a/agents/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-"""
-Rune Agents
-
-Working implementations of Scribe (context capture) and Retriever (context retrieval).
-
-Philosophy:
-- All memory is reproducible from payload.text (Markdown)
-- Evidence-based reasoning: "Why" cannot be confirmed without quotes
-- On-device similarity search for decision detection
-- Text-only storage (no binary data)
-
-Usage:
-    from agents.common import load_config, EmbeddingService, PatternCache
-    from agents.common.schemas import DecisionRecord, render_payload_text
-    from agents.scribe import DecisionDetector, RecordBuilder
-    from agents.retriever import Searcher, Synthesizer
-"""
-
-__version__ = "0.3.1"
diff --git a/agents/common/__init__.py b/agents/common/__init__.py
deleted file mode 100644
index 108832e..0000000
--- a/agents/common/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-"""
-Rune Agents Common Module
-
-Shared infrastructure for Scribe and Retriever agents.
-"""
-
-from .config import RuneConfig, load_config
-from .embedding_service import EmbeddingService
-from .envector_client import EnVectorClient
-from .pattern_cache import PatternCache, PatternEntry
-
-__all__ = [
-    "RuneConfig",
-    "load_config",
-    "EmbeddingService",
-    "EnVectorClient",
-    "PatternCache",
-    "PatternEntry",
-]
diff --git a/agents/common/config.py b/agents/common/config.py
deleted file mode 100644
index 2be3a1f..0000000
--- a/agents/common/config.py
+++ /dev/null
@@ -1,365 +0,0 @@
-"""
-Configuration Management for Rune Agents
-
-Loads configuration from ~/.rune/config.json and environment variables.
-"""
-
-import os
-import json
-from pathlib import Path
-from dataclasses import dataclass, field
-
-# Default config paths
-CONFIG_DIR = Path.home() / ".rune"
-CONFIG_PATH = CONFIG_DIR / "config.json"
-LOGS_DIR = CONFIG_DIR / "logs"
-KEYS_DIR = CONFIG_DIR / "keys"
-REVIEW_QUEUE_PATH = CONFIG_DIR / "review_queue.json"
-CAPTURE_LOG_PATH = CONFIG_DIR / "capture_log.jsonl"
-
-# Project paths (relative to this file)
-PROJECT_ROOT = Path(__file__).parent.parent.parent  # rune/
-PATTERNS_DIR = PROJECT_ROOT / "patterns"
-MCP_SERVER_DIR = PROJECT_ROOT / "mcp" / "server"
-
-
-@dataclass
-class VaultConfig:
-    """Rune-Vault configuration"""
-    endpoint: str = ""
-    token: str = ""
-    ca_cert: str = ""        # Path to CA cert PEM. Empty = system CA.
-    tls_disable: bool = False
-
-
-@dataclass
-class EmbeddingConfig:
-    """Embedding model configuration"""
-    mode: str = "sbert"  # sentence-transformers (on-device)
-    model: str = "Qwen/Qwen3-Embedding-0.6B"
-
-
-@dataclass
-class LLMConfig:
-    """Shared LLM provider configuration across all agents"""
-    provider: str = "anthropic"
-    tier2_provider: str = "anthropic"
-    anthropic_api_key: str = ""
-    anthropic_model: str = "claude-sonnet-4-20250514"
-    openai_api_key: str = ""
-    openai_model: str = "gpt-4o-mini"
-    openai_tier2_model: str = ""
-    google_api_key: str = ""
-    google_model: str = "gemini-2.0-flash-exp"
-    google_tier2_model: str = ""
-
-
-@dataclass
-class EnVectorConfig:
-    """enVector Cloud credentials (cached from Vault bundle)"""
-    endpoint: str = ""
-    api_key: str = ""
-
-
-@dataclass
-class ScribeConfig:
-    """Scribe agent configuration"""
-    slack_webhook_port: int = 8080
-    similarity_threshold: float = 0.35  # Tier 1: wider net (Tier 2 LLM handles precision)
-    auto_capture_threshold: float = 0.7
-    tier2_enabled: bool = False  # Legacy: only enable if API keys configured
-    tier2_model: str = "claude-haiku-4-5-20251001"
-    patterns_path: str = str(PATTERNS_DIR / "capture-triggers.md")
-    slack_signing_secret: str = ""
-    notion_signing_secret: str = ""
-
-
-@dataclass
-class RetrieverConfig:
-    """Retriever agent configuration"""
-    topk: int = 10
-    confidence_threshold: float = 0.5
-
-
-@dataclass
-class RuneConfig:
-    """Main Rune configuration"""
-    vault: VaultConfig = field(default_factory=VaultConfig)
-    envector: EnVectorConfig = field(default_factory=EnVectorConfig)
-    embedding: EmbeddingConfig = field(default_factory=EmbeddingConfig)
-    llm: LLMConfig = field(default_factory=LLMConfig)
-    scribe: ScribeConfig = field(default_factory=ScribeConfig)
-    retriever: RetrieverConfig = field(default_factory=RetrieverConfig)
-    state: str = "dormant"  # "active" or "dormant"
-    dormant_reason: str = ""  # raeson why plugin entered dormant state (e.g., "vault_unreachable", "user_deactivated")
-    dormant_since: str = ""   # Timestamp of when dormant state was entered
-    _env_sourced_keys: set = field(default_factory=set, repr=False)
-
-
-def _parse_vault_config(data: dict) -> VaultConfig:
-    """Parse vault section from config dict"""
-    vault_data = data.get("vault", {})
-    return VaultConfig(
-        endpoint=vault_data.get("endpoint") or vault_data.get("url", ""),
-        token=vault_data.get("token", ""),
-        ca_cert=vault_data.get("ca_cert", ""),
-        tls_disable=vault_data.get("tls_disable", False),
-    )
-
-
-def _parse_embedding_config(data: dict) -> EmbeddingConfig:
-    """Parse embedding section from config dict"""
-    embedding_data = data.get("embedding", {})
-    return EmbeddingConfig(
-        mode=embedding_data.get("mode", "sbert"),
-        model=embedding_data.get("model", "Qwen/Qwen3-Embedding-0.6B"),
-    )
-
-
-def _parse_scribe_config(data: dict) -> ScribeConfig:
-    """Parse scribe section from config dict"""
-    scribe_data = data.get("scribe", {})
-    return ScribeConfig(
-        slack_webhook_port=scribe_data.get("slack_webhook_port", 8080),
-        similarity_threshold=scribe_data.get("similarity_threshold", 0.35),
-        auto_capture_threshold=scribe_data.get("auto_capture_threshold", 0.7),
-        tier2_enabled=scribe_data.get("tier2_enabled", False),
-        tier2_model=scribe_data.get("tier2_model", "claude-haiku-4-5-20251001"),
-        patterns_path=scribe_data.get("patterns_path", str(PATTERNS_DIR / "capture-triggers.md")),
-        slack_signing_secret=scribe_data.get("slack_signing_secret", ""),
-        notion_signing_secret=scribe_data.get("notion_signing_secret", ""),
-    )
-
-
-def _parse_retriever_config(data: dict) -> RetrieverConfig:
-    """Parse retriever section from config dict (non-LLM fields only)"""
-    retriever_data = data.get("retriever", {})
-    return RetrieverConfig(
-        topk=retriever_data.get("topk", 10),
-        confidence_threshold=retriever_data.get("confidence_threshold", 0.5),
-    )
-
-
-def _parse_envector_config(data: dict) -> EnVectorConfig:
-    """Parse envector section from config dict"""
-    ev_data = data.get("envector", {})
-    return EnVectorConfig(
-        endpoint=ev_data.get("endpoint", ""),
-        api_key=ev_data.get("api_key", ""),
-    )
-
-
-def _parse_llm_config(data: dict) -> LLMConfig:
-    """Parse LLM configuration with backward-compatible migration.
-
-    Reads from ``data["llm"]`` first. If that section is absent, falls back
-    to reading LLM-specific keys from ``data["retriever"]`` and
-    ``data["scribe"]["tier2_provider"]`` for backward compatibility with
-    configs written before the ``llm`` section existed.
-    """
-    llm_data = data.get("llm")
-
-    if llm_data is not None:
-        # New-style config: read directly from llm section
-        return LLMConfig(
-            provider=llm_data.get("provider", "anthropic"),
-            tier2_provider=llm_data.get("tier2_provider", "anthropic"),
-            anthropic_api_key=llm_data.get("anthropic_api_key", ""),
-            anthropic_model=llm_data.get("anthropic_model", "claude-sonnet-4-20250514"),
-            openai_api_key=llm_data.get("openai_api_key", ""),
-            openai_model=llm_data.get("openai_model", "gpt-4o-mini"),
-            openai_tier2_model=llm_data.get("openai_tier2_model", ""),
-            google_api_key=llm_data.get("google_api_key", ""),
-            google_model=llm_data.get("google_model", "gemini-2.0-flash-exp"),
-            google_tier2_model=llm_data.get("google_tier2_model", ""),
-        )
-
-    # Migration: fall back to retriever + scribe fields
-    retriever_data = data.get("retriever", {})
-    scribe_data = data.get("scribe", {})
-
-    return LLMConfig(
-        provider=retriever_data.get("llm_provider", "anthropic"),
-        tier2_provider=scribe_data.get("tier2_provider", "anthropic"),
-        anthropic_api_key=retriever_data.get("anthropic_api_key", ""),
-        anthropic_model=retriever_data.get("anthropic_model", "claude-sonnet-4-20250514"),
-        openai_api_key=retriever_data.get("openai_api_key", ""),
-        openai_model=retriever_data.get("openai_model", "gpt-4o-mini"),
-        openai_tier2_model="",
-        google_api_key=retriever_data.get("google_api_key", ""),
-        google_model=retriever_data.get("google_model", "gemini-2.0-flash-exp"),
-        google_tier2_model="",
-    )
-
-
-def load_config() -> RuneConfig:
-    """
-    Load configuration from file and environment variables.
-
-    Vault credentials are loaded from ~/.rune/config.json.
-    enVector credentials are cached in config.json (populated from Vault bundle
-    during pipeline initialization).
-    Other settings (embedding, scribe, LLM keys) can be overridden via
-    environment variables.
-
-    Priority (highest to lowest):
-    1. Environment variables
-    2. Config file (~/.rune/config.json)
-    3. Default values
-    """
-    config = RuneConfig()
-
-    # Load from config file if exists
-    if CONFIG_PATH.exists():
-        try:
-            with open(CONFIG_PATH) as f:
-                data = json.load(f)
-
-            config.vault = _parse_vault_config(data)
-            config.envector = _parse_envector_config(data)
-            config.embedding = _parse_embedding_config(data)
-            config.llm = _parse_llm_config(data)
-            config.scribe = _parse_scribe_config(data)
-            config.retriever = _parse_retriever_config(data)
-            config.state = data.get("state", "dormant")
-            config.dormant_reason = data.get("dormant_reason", "")
-            config.dormant_since = data.get("dormant_since", "")
-        except (json.JSONDecodeError, IOError) as e:
-            print(f"[Config] Warning: Failed to load config file: {e}")
-
-    # Environment variable overrides
-    if os.getenv("EMBEDDING_MODE"):
-        config.embedding.mode = os.getenv("EMBEDDING_MODE")
-    if os.getenv("EMBEDDING_MODEL"):
-        config.embedding.model = os.getenv("EMBEDDING_MODEL")
-
-    if os.getenv("SCRIBE_PORT"):
-        try:
-            config.scribe.slack_webhook_port = int(os.getenv("SCRIBE_PORT"))
-        except ValueError:
-            print(f"[Config] Warning: invalid SCRIBE_PORT value: {os.getenv('SCRIBE_PORT')}")
-    if os.getenv("SCRIBE_THRESHOLD"):
-        try:
-            config.scribe.similarity_threshold = float(os.getenv("SCRIBE_THRESHOLD"))
-        except ValueError:
-            print(f"[Config] Warning: invalid SCRIBE_THRESHOLD value: {os.getenv('SCRIBE_THRESHOLD')}")
-    if os.getenv("SCRIBE_AUTO_THRESHOLD"):
-        try:
-            config.scribe.auto_capture_threshold = float(os.getenv("SCRIBE_AUTO_THRESHOLD"))
-        except ValueError:
-            print(f"[Config] Warning: invalid SCRIBE_AUTO_THRESHOLD value: {os.getenv('SCRIBE_AUTO_THRESHOLD')}")
-    if os.getenv("SLACK_SIGNING_SECRET"):
-        config.scribe.slack_signing_secret = os.getenv("SLACK_SIGNING_SECRET")
-    if os.getenv("NOTION_SIGNING_SECRET"):
-        config.scribe.notion_signing_secret = os.getenv("NOTION_SIGNING_SECRET")
-
-    # LLM env var overrides (target config.llm, track env-sourced keys)
-    _env_llm_map = {
-        "ANTHROPIC_API_KEY": "anthropic_api_key",
-        "ANTHROPIC_MODEL": "anthropic_model",
-        "OPENAI_API_KEY": "openai_api_key",
-        "OPENAI_MODEL": "openai_model",
-        "GOOGLE_API_KEY": "google_api_key",
-        "GEMINI_API_KEY": "google_api_key",
-        "GOOGLE_MODEL": "google_model",
-        "RUNE_LLM_PROVIDER": "provider",
-        "RUNE_TIER2_LLM_PROVIDER": "tier2_provider",
-    }
-    for env_var, attr in _env_llm_map.items():
-        val = os.getenv(env_var)
-        if val:
-            setattr(config.llm, attr, val)
-            config._env_sourced_keys.add(attr)
-
-    if os.getenv("RUNE_STATE"):
-        config.state = os.getenv("RUNE_STATE")
-
-    return config
-
-
-def save_config(config: RuneConfig) -> None:
-    """Save configuration to file.
-
-    API key fields that were sourced from environment variables are written
-    as empty strings so that secrets are not persisted to disk.
-    """
-    CONFIG_DIR.mkdir(parents=True, exist_ok=True)
-    os.chmod(str(CONFIG_DIR), 0o700)  # Force 700 regardless of umask
-
-    env_sourced = getattr(config, "_env_sourced_keys", set())
-
-    # Build llm section, blanking out env-sourced API key fields
-    _llm_api_key_fields = {
-        "anthropic_api_key", "openai_api_key", "google_api_key",
-    }
-    llm_section = {
-        "provider": config.llm.provider,
-        "tier2_provider": config.llm.tier2_provider,
-        "anthropic_api_key": config.llm.anthropic_api_key,
-        "anthropic_model": config.llm.anthropic_model,
-        "openai_api_key": config.llm.openai_api_key,
-        "openai_model": config.llm.openai_model,
-        "openai_tier2_model": config.llm.openai_tier2_model,
-        "google_api_key": config.llm.google_api_key,
-        "google_model": config.llm.google_model,
-        "google_tier2_model": config.llm.google_tier2_model,
-    }
-    for key in _llm_api_key_fields:
-        if key in env_sourced:
-            llm_section[key] = ""
-
-    data = {
-        "vault": {
-            "endpoint": config.vault.endpoint,
-            "token": config.vault.token,
-            "ca_cert": config.vault.ca_cert,
-            "tls_disable": config.vault.tls_disable,
-        },
-        "envector": {
-            "endpoint": config.envector.endpoint,
-            "api_key": config.envector.api_key,
-        },
-        "embedding": {
-            "mode": config.embedding.mode,
-            "model": config.embedding.model,
-        },
-        "llm": llm_section,
-        "scribe": {
-            "slack_webhook_port": config.scribe.slack_webhook_port,
-            "similarity_threshold": config.scribe.similarity_threshold,
-            "auto_capture_threshold": config.scribe.auto_capture_threshold,
-            "tier2_enabled": config.scribe.tier2_enabled,
-            "tier2_model": config.scribe.tier2_model,
-            "patterns_path": config.scribe.patterns_path,
-            "slack_signing_secret": config.scribe.slack_signing_secret,
-            "notion_signing_secret": config.scribe.notion_signing_secret,
-        },
-        "retriever": {
-            "topk": config.retriever.topk,
-            "confidence_threshold": config.retriever.confidence_threshold,
-        },
-        "state": config.state,
-    }
-
-    # Include dormant metadata
-    if config.state == "dormant":
-        if config.dormant_reason:
-            data["dormant_reason"] = config.dormant_reason
-        if config.dormant_since:
-            data["dormant_since"] = config.dormant_since
-
-    with open(CONFIG_PATH, "w") as f:
-        json.dump(data, f, indent=2)
-
-    # Set secure permissions
-    CONFIG_PATH.chmod(0o600)
-
-
-def ensure_directories() -> None:
-    """Ensure required directories exist with secure permissions"""
-    CONFIG_DIR.mkdir(parents=True, exist_ok=True)
-    os.chmod(str(CONFIG_DIR), 0o700)
-    LOGS_DIR.mkdir(parents=True, exist_ok=True)
-    os.chmod(str(LOGS_DIR), 0o700)
-    KEYS_DIR.mkdir(parents=True, exist_ok=True)
-    os.chmod(str(KEYS_DIR), 0o700)
diff --git a/agents/common/embedding_service.py b/agents/common/embedding_service.py
deleted file mode 100644
index 1bc9d38..0000000
--- a/agents/common/embedding_service.py
+++ /dev/null
@@ -1,178 +0,0 @@
-"""
-Embedding Service
-
-Wraps the existing EmbeddingAdapter from mcp/adapter.
-Provides on-device embedding generation using fastembed.
-"""
-
-import logging
-import sys
-from pathlib import Path
-from typing import List, Optional
-import numpy as np
-
-logger = logging.getLogger("rune.common.embedding")
-
-# Add mcp/ to path so `from adapter import ...` works
-MCP_ROOT = Path(__file__).parent.parent.parent / "mcp"
-if str(MCP_ROOT) not in sys.path:
-    sys.path.insert(0, str(MCP_ROOT))
-
-
-class EmbeddingService:
-    """
-    Singleton embedding service for Rune agents.
-
-    Uses fastembed by default for on-device embedding generation.
-    This avoids external API calls and keeps data local.
-    """
-
-    _instance: Optional["EmbeddingService"] = None
-    _adapter = None
-
-    def __new__(cls, mode: str = "femb", model: str = "Qwen/Qwen3-Embedding-0.6B"):
-        if cls._instance is None:
-            cls._instance = super().__new__(cls)
-            cls._instance._init_adapter(mode, model)
-        elif cls._instance._model != model or cls._instance._mode != mode:
-            cls._instance._init_adapter(mode, model)
-        return cls._instance
-
-    def _init_adapter(self, mode: str, model: str) -> None:
-        """Initialize the underlying EmbeddingAdapter"""
-        try:
-            from adapter.embeddings import EmbeddingAdapter
-            self._adapter = EmbeddingAdapter(mode=mode, model_name=model)
-            self._mode = mode
-            self._model = model
-            logger.info("Initialized with mode=%s, model=%s", mode, model)
-        except ImportError as e:
-            logger.warning("Could not import EmbeddingAdapter: %s", e)
-            logger.warning("Using fallback mode (no embeddings)")
-            self._adapter = None
-
-    @property
-    def is_available(self) -> bool:
-        """Check if embedding service is available"""
-        return self._adapter is not None
-
-    def embed(self, texts: List[str]) -> List[List[float]]:
-        """
-        Generate embeddings for a list of texts.
-
-        Args:
-            texts: List of strings to embed
-
-        Returns:
-            List of embedding vectors (L2 normalized)
-        """
-        if not self._adapter:
-            raise RuntimeError("EmbeddingAdapter not initialized")
-
-        if not texts:
-            return []
-
-        embeddings = self._adapter.get_embedding(texts)
-
-        # Ensure consistent return type
-        if isinstance(embeddings, np.ndarray):
-            return embeddings.tolist()
-        return embeddings
-
-    def embed_single(self, text: str) -> List[float]:
-        """
-        Generate embedding for a single text.
-
-        Args:
-            text: String to embed
-
-        Returns:
-            Embedding vector (L2 normalized)
-        """
-        if not text:
-            raise ValueError("Cannot embed empty text")
-
-        embeddings = self.embed([text])
-        return embeddings[0]
-
-    def cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float:
-        """
-        Compute cosine similarity between two vectors.
-
-        Note: EmbeddingAdapter already L2 normalizes vectors,
-        so dot product equals cosine similarity.
-
-        Args:
-            vec1: First embedding vector
-            vec2: Second embedding vector
-
-        Returns:
-            Cosine similarity score (0.0 to 1.0)
-        """
-        v1 = np.array(vec1)
-        v2 = np.array(vec2)
-
-        # Handle potential dimension mismatch
-        if v1.shape != v2.shape:
-            raise ValueError(f"Vector dimension mismatch: {v1.shape} vs {v2.shape}")
-
-        # For normalized vectors, dot product = cosine similarity
-        similarity = float(np.dot(v1, v2))
-
-        # Clamp to valid range (numerical precision issues)
-        return max(0.0, min(1.0, similarity))
-
-    def batch_cosine_similarity(
-        self,
-        query_vec: List[float],
-        vectors: List[List[float]]
-    ) -> List[float]:
-        """
-        Compute cosine similarity between a query and multiple vectors.
-
-        Args:
-            query_vec: Query embedding vector
-            vectors: List of embedding vectors to compare against
-
-        Returns:
-            List of similarity scores
-        """
-        if not vectors:
-            return []
-
-        query = np.array(query_vec)
-        matrix = np.array(vectors)
-
-        # Matrix multiplication for batch similarity
-        similarities = np.dot(matrix, query)
-
-        # Clamp to valid range
-        similarities = np.clip(similarities, 0.0, 1.0)
-
-        return similarities.tolist()
-
-
-# Module-level singleton getter
-_service_instance: Optional[EmbeddingService] = None
-
-
-def get_embedding_service(
-    mode: str = "sbert",
-    model: str = "Qwen/Qwen3-Embedding-0.6B"
-) -> EmbeddingService:
-    """
-    Get the singleton EmbeddingService instance.
-
-    Args:
-        mode: Embedding mode (femb, sbert, hf, openai)
-        model: Model name
-
-    Returns:
-        EmbeddingService instance
-    """
-    global _service_instance
-
-    if _service_instance is None:
-        _service_instance = EmbeddingService(mode=mode, model=model)
-
-    return _service_instance
diff --git a/agents/common/envector_client.py b/agents/common/envector_client.py
deleted file mode 100644
index 9afda07..0000000
--- a/agents/common/envector_client.py
+++ /dev/null
@@ -1,220 +0,0 @@
-"""
-EnVector Client
-
-Wraps EnVectorSDKAdapter for direct access to enVector operations.
-Avoids MCP protocol overhead by importing adapters directly.
-"""
-
-import json
-import logging
-import sys
-from pathlib import Path
-from typing import List, Dict, Any, Optional
-
-logger = logging.getLogger("rune.common.envector")
-
-# Add mcp/ to path so `from adapter import ...` works
-MCP_ROOT = Path(__file__).parent.parent.parent / "mcp"
-if str(MCP_ROOT) not in sys.path:
-    sys.path.insert(0, str(MCP_ROOT))
-
-
-class EnVectorClient:
-    """
-    Direct client to enVector operations.
-
-    Uses direct import of EnVectorSDKAdapter instead of MCP protocol
-    for lower overhead when running on the same machine.
-    """
-
-    def __init__(
-        self,
-        address: str = "localhost:50050",
-        key_path: str = "~/.rune/keys",
-        key_id: str = None,
-        access_token: Optional[str] = None,
-        auto_key_setup: bool = True,
-        agent_id: Optional[str] = None,
-        agent_dek: Optional[bytes] = None,
-    ):
-        """
-        Initialize EnVector client.
-
-        Args:
-            address: enVector server address (host:port or cloud URL)
-            key_path: Path to store/load encryption keys
-            key_id: Key identifier
-            access_token: Cloud access token (for enVector Cloud)
-            auto_key_setup: Auto-generate keys if not found
-            agent_id: Per-agent identifier for app-layer metadata encryption
-            agent_dek: Per-agent AES-256 DEK (32 bytes) from Vault
-        """
-        self._address = address
-        self._key_path = Path(key_path).expanduser()
-        self._key_id = key_id
-        self._access_token = access_token
-        self._auto_key_setup = auto_key_setup
-        self._agent_id = agent_id
-        self._agent_dek = agent_dek
-        self._adapter = None
-        self._initialized = False
-
-    def _ensure_initialized(self) -> None:
-        """Lazily initialize the adapter"""
-        if self._initialized:
-            return
-
-        try:
-            from adapter.envector_sdk import EnVectorSDKAdapter
-
-            # Ensure key directory exists
-            self._key_path.mkdir(parents=True, exist_ok=True)
-
-            self._adapter = EnVectorSDKAdapter(
-                address=self._address,
-                key_id=self._key_id,
-                key_path=str(self._key_path),
-                eval_mode="rmp",
-                query_encryption=False,  # Plain queries for simplicity
-                access_token=self._access_token,
-                auto_key_setup=self._auto_key_setup,
-                agent_id=self._agent_id,
-                agent_dek=self._agent_dek,
-            )
-            self._initialized = True
-            logger.info("Connected to %s", self._address)
-
-        except ImportError as e:
-            logger.warning("Could not import EnVectorSDKAdapter: %s", e)
-            raise RuntimeError(f"EnVectorSDKAdapter not available: {e}")
-        except Exception as e:
-            logger.error("Error initializing: %s", e)
-            raise
-
-    @property
-    def is_available(self) -> bool:
-        """Check if client is available"""
-        try:
-            self._ensure_initialized()
-            return self._adapter is not None
-        except Exception:
-            return False
-
-    def get_index_list(self) -> Dict[str, Any]:
-        """Get list of all indexes"""
-        self._ensure_initialized()
-        return self._adapter.call_get_index_list()
-
-    def insert(
-        self,
-        index_name: str,
-        vectors: List[List[float]],
-        metadata: Optional[List[Dict]] = None
-    ) -> Dict[str, Any]:
-        """
-        Insert vectors into an index.
-
-        Args:
-            index_name: Target index name
-            vectors: List of embedding vectors
-            metadata: Optional list of metadata dicts (one per vector)
-
-        Returns:
-            Result dict with ok/error status
-        """
-        self._ensure_initialized()
-
-        if metadata:
-            # Serialize metadata to JSON strings
-            meta_list = [
-                json.dumps(m) if isinstance(m, dict) else str(m)
-                for m in metadata
-            ]
-        else:
-            meta_list = [json.dumps({"index": i}) for i in range(len(vectors))]
-
-        return self._adapter.call_insert(
-            index_name=index_name,
-            vectors=vectors,
-            metadata=meta_list
-        )
-
-    def insert_with_text(
-        self,
-        index_name: str,
-        texts: List[str],
-        embedding_service,
-        metadata: Optional[List[Dict]] = None
-    ) -> Dict[str, Any]:
-        """
-        Embed texts and insert into index.
-
-        Args:
-            index_name: Target index name
-            texts: List of texts to embed
-            embedding_service: EmbeddingService instance
-            metadata: Optional list of metadata dicts
-
-        Returns:
-            Result dict with ok/error status
-        """
-        # Generate embeddings
-        vectors = embedding_service.embed(texts)
-
-        # Add text to metadata if not provided
-        if metadata is None:
-            metadata = [{"text": t} for t in texts]
-        else:
-            for i, meta in enumerate(metadata):
-                if "text" not in meta:
-                    meta["text"] = texts[i]
-
-        return self.insert(index_name, vectors, metadata)
-
-    def score(
-        self,
-        index_name: str,
-        query_vector: List[float],
-    ) -> Dict[str, Any]:
-        """
-        Encrypted similarity scoring (Vault-secured pipeline step 1).
-
-        Returns encrypted score blobs for Vault decryption.
-
-        Args:
-            index_name: Index to score against
-            query_vector: Query embedding vector
-
-        Returns:
-            Result dict with encrypted_blobs list
-        """
-        self._ensure_initialized()
-        return self._adapter.call_score(
-            index_name=index_name,
-            query=[query_vector],
-        )
-
-    def remind(
-        self,
-        index_name: str,
-        indices: List[Dict[str, Any]],
-        output_fields: Optional[List[str]] = None,
-    ) -> Dict[str, Any]:
-        """
-        Retrieve metadata for indices returned by Vault (Vault-secured pipeline step 3).
-
-        Args:
-            index_name: Index to fetch metadata from
-            indices: List of dicts with shard_idx, row_idx, score
-            output_fields: Fields to include (default: ["metadata"])
-
-        Returns:
-            Result dict with metadata entries
-        """
-        self._ensure_initialized()
-        return self._adapter.call_remind(
-            index_name=index_name,
-            indices=indices,
-            output_fields=output_fields,
-        )
-
diff --git a/agents/common/language.py b/agents/common/language.py
deleted file mode 100644
index d6d2597..0000000
--- a/agents/common/language.py
+++ /dev/null
@@ -1,172 +0,0 @@
-"""
-Language Detection Service
-
-Automatic per-message language detection using langdetect + Unicode script fallback.
-Used to route messages to LLM extraction (non-English) or regex extraction (English).
-"""
-
-import re
-import unicodedata
-from dataclasses import dataclass
-from typing import Optional
-
-# Matches any Hangul, Kana, or CJK character
-_NON_LATIN_RE = re.compile(
-    r'[\u1100-\u11FF\u3040-\u309F\u30A0-\u30FF\u3130-\u318F'
-    r'\u3400-\u4DBF\u4E00-\u9FFF\uAC00-\uD7AF]'
-)
-
-# Seed langdetect for deterministic results
-try:
-    from langdetect import DetectorFactory
-    DetectorFactory.seed = 0
-except ImportError:
-    pass
-
-
-@dataclass(frozen=True)
-class LanguageInfo:
-    """Detected language information"""
-    code: str           # ISO 639-1: "en", "ko", "ja"
-    confidence: float   # 0.0~1.0
-    script: str         # "Latin", "Hangul", "CJK", "Kana", "Mixed"
-
-    @property
-    def is_english(self) -> bool:
-        return self.code == "en"
-
-    @property
-    def needs_llm_extraction(self) -> bool:
-        """Non-English text needs LLM extraction path"""
-        return not self.is_english
-
-
-# Unicode range based script detection
-_SCRIPT_RANGES = [
-    (0xAC00, 0xD7AF, "Hangul", "ko"),    # Hangul Syllables
-    (0x1100, 0x11FF, "Hangul", "ko"),    # Hangul Jamo
-    (0x3130, 0x318F, "Hangul", "ko"),    # Hangul Compatibility Jamo
-    (0x3040, 0x309F, "Kana", "ja"),      # Hiragana
-    (0x30A0, 0x30FF, "Kana", "ja"),      # Katakana
-    (0x4E00, 0x9FFF, "CJK", "zh"),      # CJK Unified Ideographs
-    (0x3400, 0x4DBF, "CJK", "zh"),      # CJK Extension A
-]
-
-
-def _detect_script(text: str) -> tuple[str, Optional[str]]:
-    """Detect dominant script from Unicode character ranges.
-
-    Returns:
-        (script_name, language_code) or ("Latin", None) for ASCII-dominant text
-    """
-    script_counts: dict[str, int] = {}
-    lang_counts: dict[str, int] = {}
-    total = 0
-
-    for ch in text:
-        if ch.isspace() or ch in '.,!?;:"\'-()[]{}':
-            continue
-        total += 1
-        cp = ord(ch)
-        matched = False
-        for start, end, script, lang in _SCRIPT_RANGES:
-            if start <= cp <= end:
-                script_counts[script] = script_counts.get(script, 0) + 1
-                lang_counts[lang] = lang_counts.get(lang, 0) + 1
-                matched = True
-                break
-        if not matched:
-            script_counts["Latin"] = script_counts.get("Latin", 0) + 1
-
-    if total == 0:
-        return "Latin", None
-
-    # Find dominant script
-    dominant_script = max(script_counts, key=script_counts.get)
-    dominant_count = script_counts[dominant_script]
-
-    # If multiple scripts are significant, mark as Mixed
-    non_latin_scripts = {k: v for k, v in script_counts.items() if k != "Latin"}
-    if len(non_latin_scripts) > 1:
-        top_two = sorted(non_latin_scripts.values(), reverse=True)
-        if len(top_two) >= 2 and top_two[1] > total * 0.2:
-            # Japanese text often mixes CJK + Kana
-            if "Kana" in non_latin_scripts and "CJK" in non_latin_scripts:
-                return "Kana", "ja"
-            return "Mixed", None
-
-    # Determine language from dominant non-Latin script
-    if dominant_script != "Latin" and dominant_count > total * 0.15:
-        # Find the language for this script
-        for lang, count in lang_counts.items():
-            if count == max(lang_counts.values()):
-                # Special case: CJK with Kana = Japanese
-                if "Kana" in script_counts and script_counts.get("Kana", 0) > 0:
-                    return "Kana", "ja"
-                return dominant_script, lang
-
-    return "Latin", None
-
-
-def detect_language(text: str) -> LanguageInfo:
-    """Detect language of input text.
-
-    Uses langdetect library with Unicode script-based fallback.
-    Short texts (<10 chars) default to English.
-
-    Args:
-        text: Input text to detect language for
-
-    Returns:
-        LanguageInfo with detected language code, confidence, and script
-    """
-    if not text or not text.strip():
-        return LanguageInfo(code="en", confidence=1.0, script="Latin")
-
-    cleaned = text.strip()
-
-    # Very short text defaults to English
-    if len(cleaned) < 10:
-        # But check for obvious non-Latin scripts
-        script, lang = _detect_script(cleaned)
-        if lang:
-            return LanguageInfo(code=lang, confidence=0.6, script=script)
-        return LanguageInfo(code="en", confidence=0.5, script="Latin")
-
-    # Determine script first — used to validate langdetect results
-    script, script_lang = _detect_script(cleaned)
-
-    # Try langdetect
-    try:
-        from langdetect import detect_langs
-        results = detect_langs(cleaned)
-        if results:
-            top = results[0]
-            lang_code = top.lang
-            confidence = top.prob
-
-            # If text is purely Latin-script (no Hangul/Kana/CJK characters),
-            # treat as English. langdetect frequently misclassifies short English
-            # text as fr, af, nl, de, etc. The LLM extraction path is designed
-            # for CJK scripts (ko, ja, zh), not Latin-script languages.
-            # However, if there ARE any non-Latin chars (e.g., Korean text with
-            # English terms like "PostgreSQL"), trust langdetect.
-            if lang_code != "en" and not _NON_LATIN_RE.search(cleaned):
-                return LanguageInfo(code="en", confidence=0.5, script="Latin")
-
-            return LanguageInfo(
-                code=lang_code,
-                confidence=round(confidence, 4),
-                script=script,
-            )
-    except ImportError:
-        pass  # langdetect not installed, fall through to Unicode fallback
-    except Exception:
-        pass  # langdetect failed (e.g., too short), fall through
-
-    # Fallback: Unicode script-based detection
-    if script_lang:
-        return LanguageInfo(code=script_lang, confidence=0.7, script=script)
-
-    # Default to English for Latin script
-    return LanguageInfo(code="en", confidence=0.5, script="Latin")
diff --git a/agents/common/llm_client.py b/agents/common/llm_client.py
deleted file mode 100644
index 3c886c1..0000000
--- a/agents/common/llm_client.py
+++ /dev/null
@@ -1,139 +0,0 @@
-"""
-Provider-agnostic LLM client for Rune pipelines.
-
-Supports Anthropic, OpenAI, and Google Gemini with a shared text-generation
-interface.
-"""
-
-from __future__ import annotations
-
-import logging
-from typing import Optional
-
-logger = logging.getLogger("rune.common.llm_client")
-
-
-class LLMClient:
-    """Unified text generation client across LLM providers."""
-
-    def __init__(
-        self,
-        provider: str = "anthropic",
-        model: str = "",
-        anthropic_api_key: Optional[str] = None,
-        openai_api_key: Optional[str] = None,
-        google_api_key: Optional[str] = None,
-    ) -> None:
-        self.provider = (provider or "anthropic").lower()
-        self.model = model
-        self._client = None
-
-        if self.provider == "auto":
-            raise ValueError(
-                '"auto" provider must be resolved before creating LLMClient. '
-                'Use _resolve_provider() in the MCP server or scribe server.'
-            )
-
-        if self.provider == "anthropic":
-            if not anthropic_api_key:
-                logger.info("%s API key not provided, LLM client unavailable", self.provider)
-                return
-            try:
-                import anthropic
-
-                self._client = anthropic.Anthropic(api_key=anthropic_api_key)
-            except ImportError:
-                logger.warning("anthropic package not installed")
-            except Exception as e:
-                logger.warning("Failed to initialize Anthropic client: %s", e)
-            return
-
-        if self.provider == "openai":
-            if not openai_api_key:
-                logger.info("%s API key not provided, LLM client unavailable", self.provider)
-                return
-            try:
-                from openai import OpenAI
-
-                self._client = OpenAI(api_key=openai_api_key)
-            except ImportError:
-                logger.warning("openai package not installed")
-            except Exception as e:
-                logger.warning("Failed to initialize OpenAI client: %s", e)
-            return
-
-        if self.provider == "google":
-            if not google_api_key:
-                logger.info("%s API key not provided, LLM client unavailable", self.provider)
-                return
-            try:
-                import google.generativeai as genai
-
-                genai.configure(api_key=google_api_key)
-                self._client = genai  # Store the module, not a model instance
-                self._google_models = {}  # Cache models by system prompt hash
-            except ImportError:
-                logger.warning("google-generativeai package not installed")
-            except Exception as e:
-                logger.warning("Failed to initialize Gemini client: %s", e)
-            return
-
-        logger.warning("Unsupported LLM provider: %s", self.provider)
-
-    @property
-    def is_available(self) -> bool:
-        return self._client is not None
-
-    def generate(
-        self,
-        prompt: str,
-        *,
-        system: Optional[str] = None,
-        max_tokens: int = 512,
-        timeout: float = 30.0,
-    ) -> str:
-        if not self.is_available:
-            raise RuntimeError("LLM client is not available")
-
-        if self.provider == "anthropic":
-            response = self._client.messages.create(
-                model=self.model,
-                max_tokens=max_tokens,
-                system=system,
-                messages=[{"role": "user", "content": prompt}],
-                timeout=timeout,
-            )
-            return response.content[0].text.strip()
-
-        if self.provider == "openai":
-            messages = []
-            if system:
-                messages.append({"role": "system", "content": system})
-            messages.append({"role": "user", "content": prompt})
-            response = self._client.chat.completions.create(
-                model=self.model,
-                max_tokens=max_tokens,
-                messages=messages,
-                timeout=timeout,
-            )
-            return (response.choices[0].message.content or "").strip()
-
-        if self.provider == "google":
-            import hashlib
-
-            cache_key = hashlib.md5((system or "").encode()).hexdigest()
-            if cache_key not in self._google_models:
-                kwargs = {"model_name": self.model}
-                if system:
-                    kwargs["system_instruction"] = system
-                self._google_models[cache_key] = self._client.GenerativeModel(**kwargs)
-            model = self._google_models[cache_key]
-            response = model.generate_content(
-                prompt,
-                generation_config={"max_output_tokens": max_tokens},
-                request_options={"timeout": timeout},
-            )
-            return response.text.strip()
-
-        raise RuntimeError(f"Unsupported LLM provider: {self.provider}")
-
diff --git a/agents/common/llm_utils.py b/agents/common/llm_utils.py
deleted file mode 100644
index 8a520e0..0000000
--- a/agents/common/llm_utils.py
+++ /dev/null
@@ -1,39 +0,0 @@
-"""Shared utilities for parsing LLM responses."""
-
-from __future__ import annotations
-
-import json
-
-
-def parse_llm_json(raw: str) -> dict:
-    """Parse JSON from an LLM response, handling code fences and preamble text.
-
-    Tries in order:
-    1. Strip markdown code fences, then json.loads
-    2. Direct json.loads on the raw string
-    3. Extract substring between first '{' and last '}', then json.loads
-    4. Return empty dict
-    """
-    if not raw:
-        return {}
-
-    text = raw
-    if text.startswith("```"):
-        lines = text.split("\n")
-        lines = [l for l in lines if not l.strip().startswith("```")]
-        text = "\n".join(lines)
-
-    try:
-        return json.loads(text)
-    except json.JSONDecodeError:
-        pass
-
-    start = raw.find("{")
-    end = raw.rfind("}") + 1
-    if start >= 0 and end > start:
-        try:
-            return json.loads(raw[start:end])
-        except json.JSONDecodeError:
-            pass
-
-    return {}
diff --git a/agents/common/pattern_cache.py b/agents/common/pattern_cache.py
deleted file mode 100644
index 473ce0f..0000000
--- a/agents/common/pattern_cache.py
+++ /dev/null
@@ -1,203 +0,0 @@
-"""
-Pattern Cache
-
-Pre-embeds trigger patterns from capture-triggers.md at startup.
-Used for on-device similarity-based decision detection.
-"""
-
-from typing import List, Dict, Tuple, Optional
-from dataclasses import dataclass
-import numpy as np
-
-from .embedding_service import EmbeddingService
-
-
-@dataclass
-class PatternEntry:
-    """A single trigger pattern with its embedding"""
-    text: str
-    category: str
-    priority: str  # "high", "medium", "low"
-    embedding: List[float]
-    domain: Optional[str] = None
-    language: Optional[str] = None  # ISO 639-1: "en", "ko", "ja"
-
-
-class PatternCache:
-    """
-    Cache of pre-embedded trigger patterns.
-
-    At startup, loads patterns from capture-triggers.md,
-    embeds them all, and stores for fast similarity lookup.
-    """
-
-    def __init__(self, embedding_service: EmbeddingService):
-        """
-        Initialize pattern cache.
-
-        Args:
-            embedding_service: EmbeddingService instance for generating embeddings
-        """
-        self._embedding = embedding_service
-        self._patterns: List[PatternEntry] = []
-        self._embeddings_matrix: Optional[np.ndarray] = None
-        self._loaded = False
-
-    @property
-    def is_loaded(self) -> bool:
-        """Check if patterns are loaded"""
-        return self._loaded
-
-    @property
-    def pattern_count(self) -> int:
-        """Number of loaded patterns"""
-        return len(self._patterns)
-
-    def load_patterns(self, patterns: List[Dict]) -> int:
-        """
-        Load and embed patterns.
-
-        Args:
-            patterns: List of pattern dicts with keys:
-                - text: Pattern text
-                - category: Category name
-                - priority: "high", "medium", or "low"
-                - domain: Optional domain classification
-
-        Returns:
-            Number of patterns loaded
-        """
-        if not patterns:
-            print("[PatternCache] Warning: No patterns to load")
-            return 0
-
-        # Extract texts for batch embedding
-        texts = [p["text"] for p in patterns]
-
-        print(f"[PatternCache] Embedding {len(texts)} patterns...")
-        embeddings = self._embedding.embed(texts)
-
-        # Create PatternEntry objects
-        self._patterns = [
-            PatternEntry(
-                text=p["text"],
-                category=p.get("category", "general"),
-                priority=p.get("priority", "medium"),
-                domain=p.get("domain"),
-                embedding=embeddings[i],
-                language=p.get("language"),
-            )
-            for i, p in enumerate(patterns)
-        ]
-
-        # Create embeddings matrix for fast batch similarity
-        self._embeddings_matrix = np.array([p.embedding for p in self._patterns])
-        self._loaded = True
-
-        print(f"[PatternCache] Loaded {len(self._patterns)} patterns")
-        return len(self._patterns)
-
-    def find_best_match(
-        self,
-        text: str,
-        threshold: float = 0.7
-    ) -> Tuple[Optional[PatternEntry], float]:
-        """
-        Find the best matching pattern for input text.
-
-        Args:
-            text: Input text to match
-            threshold: Minimum similarity threshold
-
-        Returns:
-            Tuple of (best_match, score) or (None, best_score) if below threshold
-        """
-        if not self._loaded:
-            raise RuntimeError("Patterns not loaded. Call load_patterns() first.")
-
-        if not text.strip():
-            return (None, 0.0)
-
-        # Embed input text
-        text_embedding = self._embedding.embed_single(text)
-        text_vec = np.array(text_embedding)
-
-        # Compute similarities with all patterns (batch operation)
-        similarities = np.dot(self._embeddings_matrix, text_vec)
-
-        # Find best match
-        best_idx = int(np.argmax(similarities))
-        best_score = float(similarities[best_idx])
-
-        if best_score >= threshold:
-            return (self._patterns[best_idx], best_score)
-
-        return (None, best_score)
-
-    def find_top_matches(
-        self,
-        text: str,
-        top_k: int = 5,
-        threshold: float = 0.5
-    ) -> List[Tuple[PatternEntry, float]]:
-        """
-        Find top-k matching patterns for input text.
-
-        Args:
-            text: Input text to match
-            top_k: Number of top matches to return
-            threshold: Minimum similarity threshold
-
-        Returns:
-            List of (pattern, score) tuples, sorted by score descending
-        """
-        if not self._loaded:
-            raise RuntimeError("Patterns not loaded. Call load_patterns() first.")
-
-        if not text.strip():
-            return []
-
-        # Embed input text
-        text_embedding = self._embedding.embed_single(text)
-        text_vec = np.array(text_embedding)
-
-        # Compute similarities with all patterns
-        similarities = np.dot(self._embeddings_matrix, text_vec)
-
-        # Get top-k indices
-        if len(similarities) <= top_k:
-            top_indices = np.argsort(similarities)[::-1]
-        else:
-            top_indices = np.argpartition(similarities, -top_k)[-top_k:]
-            top_indices = top_indices[np.argsort(similarities[top_indices])[::-1]]
-
-        # Filter by threshold and create results
-        results = []
-        for idx in top_indices:
-            score = float(similarities[idx])
-            if score >= threshold:
-                results.append((self._patterns[idx], score))
-
-        return results
-
-    def get_patterns_by_category(self, category: str) -> List[PatternEntry]:
-        """Get all patterns in a category"""
-        return [p for p in self._patterns if p.category.lower() == category.lower()]
-
-    def get_patterns_by_priority(self, priority: str) -> List[PatternEntry]:
-        """Get all patterns with given priority"""
-        return [p for p in self._patterns if p.priority.lower() == priority.lower()]
-
-    def get_high_priority_patterns(self) -> List[PatternEntry]:
-        """Get all high-priority patterns"""
-        return self.get_patterns_by_priority("high")
-
-    def categories(self) -> List[str]:
-        """Get list of unique categories"""
-        return list(set(p.category for p in self._patterns))
-
-    def clear(self) -> None:
-        """Clear all loaded patterns"""
-        self._patterns = []
-        self._embeddings_matrix = None
-        self._loaded = False
diff --git a/agents/common/schemas/__init__.py b/agents/common/schemas/__init__.py
deleted file mode 100644
index d74efc2..0000000
--- a/agents/common/schemas/__init__.py
+++ /dev/null
@@ -1,52 +0,0 @@
-"""
-Rune Decision Record Schemas
-
-Schema v2 for organizational memory with evidence-based reasoning.
-"""
-
-from .decision_record import (
-    DecisionRecord,
-    DecisionDetail,
-    Context,
-    Why,
-    Evidence,
-    SourceRef,
-    SourceType,
-    Assumption,
-    Risk,
-    Quality,
-    Payload,
-    Domain,
-    Sensitivity,
-    Status,
-    Certainty,
-    ReviewState,
-    generate_record_id,
-    generate_group_id,
-)
-from .templates import render_payload_text, render_display_text, PAYLOAD_TEMPLATE, PAYLOAD_HEADERS
-
-__all__ = [
-    "DecisionRecord",
-    "DecisionDetail",
-    "Context",
-    "Why",
-    "Evidence",
-    "SourceRef",
-    "SourceType",
-    "Assumption",
-    "Risk",
-    "Quality",
-    "Payload",
-    "Domain",
-    "Sensitivity",
-    "Status",
-    "Certainty",
-    "ReviewState",
-    "generate_record_id",
-    "generate_group_id",
-    "render_payload_text",
-    "render_display_text",
-    "PAYLOAD_TEMPLATE",
-    "PAYLOAD_HEADERS",
-]
diff --git a/agents/common/schemas/decision_record.py b/agents/common/schemas/decision_record.py
deleted file mode 100644
index 88ce320..0000000
--- a/agents/common/schemas/decision_record.py
+++ /dev/null
@@ -1,259 +0,0 @@
-"""
-Decision Record Schema v2
-
-Core principle: Memory items always have a "text payload" that can fully reproduce the context.
-The embedding is generated from that text payload.
-"Why" cannot be written definitively without evidence.
-"""
-
-from datetime import datetime, timezone
-from typing import List, Optional, Literal
-from pydantic import BaseModel, Field
-from enum import Enum
-
-
-# ============================================================================
-# Enums
-# ============================================================================
-
-class Domain(str, Enum):
-    """Decision domain categories"""
-    ARCHITECTURE = "architecture"
-    SECURITY = "security"
-    PRODUCT = "product"
-    EXEC = "exec"
-    OPS = "ops"
-    DESIGN = "design"
-    DATA = "data"
-    HR = "hr"
-    MARKETING = "marketing"
-    INCIDENT = "incident"
-    DEBUGGING = "debugging"
-    QA = "qa"
-    LEGAL = "legal"
-    FINANCE = "finance"
-    SALES = "sales"
-    CUSTOMER_SUCCESS = "customer_success"
-    RESEARCH = "research"
-    RISK = "risk"
-    GENERAL = "general"
-
-
-class Sensitivity(str, Enum):
-    """Data sensitivity levels"""
-    PUBLIC = "public"
-    INTERNAL = "internal"
-    RESTRICTED = "restricted"
-
-
-class Status(str, Enum):
-    """Decision status"""
-    PROPOSED = "proposed"
-    ACCEPTED = "accepted"
-    SUPERSEDED = "superseded"
-    REVERTED = "reverted"
-
-
-class Certainty(str, Enum):
-    """Evidence certainty level for 'Why'"""
-    SUPPORTED = "supported"
-    PARTIALLY_SUPPORTED = "partially_supported"
-    UNKNOWN = "unknown"
-
-
-class ReviewState(str, Enum):
-    """Human review state"""
-    UNREVIEWED = "unreviewed"
-    APPROVED = "approved"
-    EDITED = "edited"
-    REJECTED = "rejected"
-
-
-class SourceType(str, Enum):
-    """Source types for evidence"""
-    SLACK = "slack"
-    MEETING = "meeting"
-    DOC = "doc"
-    GITHUB = "github"
-    EMAIL = "email"
-    NOTION = "notion"
-    OTHER = "other"
-
-
-# ============================================================================
-# Sub-models
-# ============================================================================
-
-class SourceRef(BaseModel):
-    """Reference to the source of evidence"""
-    type: SourceType
-    url: Optional[str] = None
-    pointer: Optional[str] = None  # e.g., "channel:#arch thread_ts:123" or "timestamp:00:32:14"
-
-
-class Evidence(BaseModel):
-    """Evidence supporting a claim with direct quote"""
-    claim: str = Field(..., description="What is being claimed")
-    quote: str = Field(..., description="Direct quote (1-2 sentences)")
-    source: SourceRef
-
-
-class Assumption(BaseModel):
-    """Assumption with confidence level"""
-    assumption: str
-    confidence: float = Field(ge=0.0, le=1.0, default=0.5)
-
-
-class Risk(BaseModel):
-    """Risk with mitigation strategy"""
-    risk: str
-    mitigation: Optional[str] = None
-
-
-class DecisionDetail(BaseModel):
-    """What was decided, by whom, when, where"""
-    what: str = Field(..., description="The actual decision statement")
-    who: List[str] = Field(default_factory=list, description="Participants (role:cto, user:alice)")
-    where: str = Field(default="", description="Channel/meeting where decided")
-    when: str = Field(default="", description="Date of decision (YYYY-MM-DD)")
-
-
-class Context(BaseModel):
-    """Context surrounding the decision"""
-    problem: str = Field(default="", description="Problem being solved")
-    scope: Optional[str] = None
-    constraints: List[str] = Field(default_factory=list)
-    alternatives: List[str] = Field(default_factory=list)
-    chosen: str = Field(default="", description="Chosen alternative")
-    trade_offs: List[str] = Field(default_factory=list)
-    assumptions: List[Assumption] = Field(default_factory=list)
-    risks: List[Risk] = Field(default_factory=list)
-
-
-class Why(BaseModel):
-    """
-    Rationale for the decision.
-
-    CRITICAL RULE: certainty cannot be 'supported' without evidence.
-    If evidence is missing, certainty MUST be 'unknown'.
-    """
-    rationale_summary: str = Field(default="", description="Summary of why this decision was made")
-    certainty: Certainty = Field(default=Certainty.UNKNOWN)
-    missing_info: List[str] = Field(default_factory=list, description="What information is missing")
-
-
-class Quality(BaseModel):
-    """Quality metrics for the capture"""
-    scribe_confidence: float = Field(ge=0.0, le=1.0, default=0.5)
-    review_state: ReviewState = Field(default=ReviewState.UNREVIEWED)
-    reviewed_by: Optional[str] = None
-    review_notes: Optional[str] = None
-
-
-class Payload(BaseModel):
-    """
-    The normalized text payload for embedding.
-    This is the SINGLE SOURCE OF TRUTH for memory reproduction.
-    """
-    format: Literal["markdown"] = "markdown"
-    text: str = Field(default="", description="Markdown text for embedding")
-
-
-# ============================================================================
-# Main Schema
-# ============================================================================
-
-class DecisionRecord(BaseModel):
-    """
-    Decision Record Schema v2.1
-
-    Core principle: payload.text must be able to fully reproduce the memory.
-    Embedding target: reusable_insight (schema 2.1+), payload.text (fallback).
-    """
-    schema_version: str = Field(default="2.1")
-    id: str = Field(..., description="Unique ID: dec_YYYY-MM-DD_domain_slug")
-    type: Literal["decision_record"] = "decision_record"
-
-    domain: Domain = Field(default=Domain.GENERAL)
-    sensitivity: Sensitivity = Field(default=Sensitivity.INTERNAL)
-    status: Status = Field(default=Status.PROPOSED)
-    superseded_by: Optional[str] = None
-    timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
-
-    title: str = Field(..., description="Short title for the decision")
-    decision: DecisionDetail
-    context: Context = Field(default_factory=Context)
-    why: Why = Field(default_factory=Why)
-    evidence: List[Evidence] = Field(default_factory=list)
-
-    links: List[dict] = Field(default_factory=list, description="Related links (ADR, PR, etc.)")
-    tags: List[str] = Field(default_factory=list)
-
-    # Group fields — for linked long-term memory
-    group_id: Optional[str] = Field(default=None, description="Shared ID linking all records in a group")
-    group_type: Optional[str] = Field(default=None, description="Group type: 'phase_chain' (sequential reasoning) or 'bundle' (detail facets)")
-    phase_seq: Optional[int] = Field(default=None, description="0-indexed position within the group")
-    phase_total: Optional[int] = Field(default=None, description="Total number of records in the group")
-
-    # Content preservation
-    original_text: Optional[str] = Field(default=None, description="Original conversation text before extraction")
-    group_summary: Optional[str] = Field(default=None, description="1-line topic summary shared across all phases for semantic anchoring")
-
-    # PRIMARY embedding target (schema 2.1+)
-    reusable_insight: str = Field(
-        default="",
-        description=(
-            "Dense natural-language paragraph capturing the core knowledge. "
-            "PRIMARY text embedded in enVector for semantic search. "
-            "256-768 tokens, no markdown, self-contained, causality-preserving."
-        ),
-    )
-
-    quality: Quality = Field(default_factory=Quality)
-    payload: Payload = Field(default_factory=Payload)
-
-    def validate_evidence_certainty(self) -> bool:
-        """
-        Validate that certainty is appropriate given evidence.
-        Returns True if valid, False if certainty should be downgraded.
-        """
-        has_quotes = any(e.quote for e in self.evidence)
-
-        if self.why.certainty == Certainty.SUPPORTED and not has_quotes:
-            return False
-        return True
-
-    def ensure_evidence_certainty_consistency(self) -> None:
-        """
-        Enforce: Why cannot be 'supported' without evidence quotes.
-        Mutates the record to fix inconsistencies.
-        """
-        has_quotes = any(e.quote for e in self.evidence)
-
-        if not has_quotes:
-            if self.why.certainty == Certainty.SUPPORTED:
-                self.why.certainty = Certainty.UNKNOWN
-                if "No direct quotes found in evidence" not in self.why.missing_info:
-                    self.why.missing_info.append("No direct quotes found in evidence")
-
-        # If no evidence at all, status should be proposed
-        if not self.evidence:
-            if self.status == Status.ACCEPTED:
-                self.status = Status.PROPOSED
-
-
-def generate_record_id(timestamp: datetime, domain: Domain, title: str) -> str:
-    """Generate a unique ID for a decision record"""
-    date_str = timestamp.strftime("%Y-%m-%d")
-    # Create slug from title (first 3 words, lowercase, underscored)
-    words = title.lower().split()[:3]
-    slug = "_".join(w for w in words if w.isalnum() or w.replace("_", "").isalnum())
-    return f"dec_{date_str}_{domain.value}_{slug}"
-
-
-def generate_group_id(timestamp: datetime, domain: Domain, title: str) -> str:
-    """Generate a shared group ID for related records (phase_chain or bundle)"""
-    date_str = timestamp.strftime("%Y-%m-%d")
-    words = title.lower().split()[:3]
-    slug = "_".join(w for w in words if w.isalnum() or w.replace("_", "").isalnum())
-    return f"grp_{date_str}_{domain.value}_{slug}"
diff --git a/agents/common/schemas/embedding.py b/agents/common/schemas/embedding.py
deleted file mode 100644
index 2d4e853..0000000
--- a/agents/common/schemas/embedding.py
+++ /dev/null
@@ -1,56 +0,0 @@
-"""
-Embedding text selection and novelty classification for DecisionRecords.
-
-Schema 2.1+ uses reusable_insight as the primary embedding target.
-Schema 2.0 falls back to payload.text.
-"""
-
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
-    from .decision_record import DecisionRecord
-
-
-# Novelty thresholds (Memory-as-Filter)
-# Calibrated for Qwen3-Embedding-0.6B (1024dim) via benchmark 2026-04-08
-NOVELTY_THRESHOLD_NOVEL = 0.4
-NOVELTY_THRESHOLD_RELATED = 0.7
-NOVELTY_THRESHOLD_NEAR_DUPLICATE = 0.93
-
-
-def embedding_text_for_record(record: "DecisionRecord") -> str:
-    """Select the text to embed in enVector.
-
-    Schema 2.1+: use reusable_insight (dense NL gist).
-    Schema 2.0 fallback: use payload.text (verbose markdown).
-    """
-    insight = getattr(record, "reusable_insight", "")
-    if insight and insight.strip():
-        return insight.strip()
-    return record.payload.text
-
-
-def classify_novelty(
-    max_similarity: float,
-    threshold_novel: float = NOVELTY_THRESHOLD_NOVEL,
-    threshold_related: float = NOVELTY_THRESHOLD_RELATED,
-    threshold_near_duplicate: float = NOVELTY_THRESHOLD_NEAR_DUPLICATE,
-) -> dict:
-    """Classify capture novelty based on similarity to existing memory.
-
-    Returns dict with 'score' (0-1, higher=more novel) and 'class'.
-    Classes (annotation-only except near_duplicate):
-      - near_duplicate (>= 0.93): blocks capture
-      - related (>= 0.7): annotation only
-      - evolution (>= 0.4): annotation only
-      - novel (< 0.4): annotation only
-    """
-    novelty_score = 1.0 - max_similarity
-    if max_similarity >= threshold_near_duplicate:
-        return {"class": "near_duplicate", "score": round(novelty_score, 4)}
-    elif max_similarity >= threshold_related:
-        return {"class": "related", "score": round(novelty_score, 4)}
-    elif max_similarity >= threshold_novel:
-        return {"class": "evolution", "score": round(novelty_score, 4)}
-    else:
-        return {"class": "novel", "score": round(novelty_score, 4)}
diff --git a/agents/common/schemas/templates.py b/agents/common/schemas/templates.py
deleted file mode 100644
index 70ea387..0000000
--- a/agents/common/schemas/templates.py
+++ /dev/null
@@ -1,363 +0,0 @@
-"""
-Payload Text Templates
-
-Renders DecisionRecord to Markdown format for embedding.
-The payload.text is the SINGLE SOURCE OF TRUTH for memory reproduction.
-"""
-
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
-    from .decision_record import DecisionRecord
-
-
-PAYLOAD_TEMPLATE = """# Decision Record: {title}
-ID: {id}
-Status: {status} | Sensitivity: {sensitivity} | Domain: {domain}
-When/Where: {when} | {where}
-
-## Decision
-{what}
-
-## Problem
-{problem}
-
-## Alternatives Considered
-{alternatives}
-
-## Why (Rationale)
-{rationale_summary}
-Certainty: {certainty}
-
-## Trade-offs
-{trade_offs}
-
-## Assumptions
-{assumptions}
-
-## Risks & Mitigations
-{risks}
-
-## Evidence (Quotes)
-{evidence_block}
-
-## Links
-{links}
-
-## Tags
-{tags}
-"""
-
-
-def _format_alternatives(alternatives: list, chosen: str) -> str:
-    """Format alternatives list with chosen marker"""
-    if not alternatives:
-        return "- (none documented)"
-
-    lines = []
-    for alt in alternatives:
-        if alt.lower() == chosen.lower() or chosen.lower() in alt.lower():
-            lines.append(f"- {alt} (chosen)")
-        else:
-            lines.append(f"- {alt}")
-    return "\n".join(lines)
-
-
-def _format_trade_offs(trade_offs: list) -> str:
-    """Format trade-offs list"""
-    if not trade_offs:
-        return "- (none documented)"
-    return "\n".join(f"- {t}" for t in trade_offs)
-
-
-def _format_assumptions(assumptions: list) -> str:
-    """Format assumptions with confidence"""
-    if not assumptions:
-        return "- (none documented)"
-
-    lines = []
-    for a in assumptions:
-        conf = getattr(a, 'confidence', 0.5)
-        lines.append(f"- {a.assumption} (confidence: {conf:.1f})")
-    return "\n".join(lines)
-
-
-def _format_risks(risks: list) -> str:
-    """Format risks with mitigations"""
-    if not risks:
-        return "- (none documented)"
-
-    lines = []
-    for r in risks:
-        mitigation = getattr(r, 'mitigation', None) or "TBD"
-        lines.append(f"- Risk: {r.risk}\n  Mitigation: {mitigation}")
-    return "\n".join(lines)
-
-
-def _format_evidence(evidence: list) -> str:
-    """Format evidence with quotes and sources"""
-    if not evidence:
-        return "(no evidence recorded)"
-
-    lines = []
-    for i, e in enumerate(evidence, 1):
-        source_type = e.source.type.value if hasattr(e.source.type, 'value') else str(e.source.type)
-        source_url = e.source.url or "(no url)"
-        source_pointer = e.source.pointer or ""
-
-        lines.append(f"{i}) Claim: {e.claim}")
-        lines.append(f'   Quote: "{e.quote}"')
-        lines.append(f"   Source: {source_type} {source_url}")
-        if source_pointer:
-            lines.append(f"   Pointer: {source_pointer}")
-        lines.append("")
-
-    return "\n".join(lines).strip()
-
-
-def _format_links(links: list) -> str:
-    """Format related links"""
-    if not links:
-        return "- (none)"
-
-    lines = []
-    for link in links:
-        rel = link.get('rel', 'link')
-        url = link.get('url', '')
-        lines.append(f"- {rel}: {url}")
-    return "\n".join(lines)
-
-
-def _format_tags(tags: list) -> str:
-    """Format tags as comma-separated"""
-    if not tags:
-        return "(none)"
-    return ", ".join(tags)
-
-
-def render_payload_text(record: "DecisionRecord") -> str:
-    """
-    Render a DecisionRecord to payload.text (Markdown).
-
-    This text is used for:
-    1. Memory reproduction (human-readable full context)
-    2. Display in recall results
-
-    NOTE (schema 2.1+): Embedding generation now uses
-    record.reusable_insight instead of this text.
-    For schema 2.0 records without reusable_insight,
-    this text is still used as embedding fallback.
-    """
-    # Extract values with safe defaults
-    domain = record.domain.value if hasattr(record.domain, 'value') else str(record.domain)
-    sensitivity = record.sensitivity.value if hasattr(record.sensitivity, 'value') else str(record.sensitivity)
-    status = record.status.value if hasattr(record.status, 'value') else str(record.status)
-    certainty = record.why.certainty.value if hasattr(record.why.certainty, 'value') else str(record.why.certainty)
-
-    # Format complex fields
-    alternatives = _format_alternatives(
-        record.context.alternatives,
-        record.context.chosen
-    )
-    trade_offs = _format_trade_offs(record.context.trade_offs)
-    assumptions = _format_assumptions(record.context.assumptions)
-    risks = _format_risks(record.context.risks)
-    evidence_block = _format_evidence(record.evidence)
-    links = _format_links(record.links)
-    tags = _format_tags(record.tags)
-
-    # Build rationale with missing info if applicable
-    rationale = record.why.rationale_summary or "(no rationale documented)"
-    if record.why.missing_info:
-        rationale += "\n\nMissing Information:\n" + "\n".join(f"- {m}" for m in record.why.missing_info)
-
-    # Group info (phase_chain or bundle)
-    phase_line = ""
-    if getattr(record, 'group_id', None):
-        seq = (getattr(record, 'phase_seq', None) or 0) + 1
-        total = getattr(record, 'phase_total', None) or "?"
-        gtype = getattr(record, 'group_type', None) or "phase_chain"
-        phase_line = f"\nPart: {seq} of {total} | Type: {gtype} | Group: {record.group_id}"
-
-    # Render template
-    text = PAYLOAD_TEMPLATE.format(
-        title=record.title,
-        id=record.id,
-        status=status,
-        sensitivity=sensitivity,
-        domain=domain,
-        when=record.decision.when or "(unknown)",
-        where=record.decision.where or "(unknown)",
-        what=record.decision.what,
-        problem=record.context.problem or "(not documented)",
-        alternatives=alternatives,
-        rationale_summary=rationale,
-        certainty=certainty,
-        trade_offs=trade_offs,
-        assumptions=assumptions,
-        risks=risks,
-        evidence_block=evidence_block,
-        links=links,
-        tags=tags,
-    )
-
-    # Insert phase line and group summary after ID line
-    if phase_line:
-        lines = text.split("\n")
-        for i, line in enumerate(lines):
-            if line.startswith("ID: "):
-                insert_pos = i + 1
-                lines.insert(insert_pos, phase_line.lstrip("\n"))
-                # Inject group summary as semantic anchor for all phases
-                group_summary = getattr(record, 'group_summary', None)
-                if group_summary:
-                    lines.insert(insert_pos + 1, f"Group Summary: {group_summary}")
-                break
-        text = "\n".join(lines)
-
-    # Clean up multiple blank lines
-    while "\n\n\n" in text:
-        text = text.replace("\n\n\n", "\n\n")
-
-    return text.strip()
-
-
-def render_compact_payload(record: "DecisionRecord") -> str:
-    """
-    Render a compact version for search result previews.
-    """
-    domain = record.domain.value if hasattr(record.domain, 'value') else str(record.domain)
-    certainty = record.why.certainty.value if hasattr(record.why.certainty, 'value') else str(record.why.certainty)
-
-    return f"""**{record.title}** ({record.id})
-Domain: {domain} | Certainty: {certainty}
-
-{record.decision.what}
-
-Why: {record.why.rationale_summary or '(no rationale)'}
-"""
-
-
-# Localized headers for display rendering (not for payload.text / embedding)
-PAYLOAD_HEADERS = {
-    "en": {
-        "decision_record": "Decision Record",
-        "decision": "Decision",
-        "problem": "Problem",
-        "alternatives": "Alternatives Considered",
-        "rationale": "Why (Rationale)",
-        "certainty": "Certainty",
-        "trade_offs": "Trade-offs",
-        "assumptions": "Assumptions",
-        "risks": "Risks & Mitigations",
-        "evidence": "Evidence (Quotes)",
-        "links": "Links",
-        "tags": "Tags",
-    },
-    "ko": {
-        "decision_record": "결정 기록",
-        "decision": "결정 사항",
-        "problem": "문제",
-        "alternatives": "검토한 대안",
-        "rationale": "근거 (이유)",
-        "certainty": "확실성",
-        "trade_offs": "트레이드오프",
-        "assumptions": "가정",
-        "risks": "리스크 및 대응",
-        "evidence": "증거 (인용)",
-        "links": "링크",
-        "tags": "태그",
-    },
-    "ja": {
-        "decision_record": "決定記録",
-        "decision": "決定事項",
-        "problem": "問題",
-        "alternatives": "検討した代替案",
-        "rationale": "根拠（理由）",
-        "certainty": "確実性",
-        "trade_offs": "トレードオフ",
-        "assumptions": "仮定",
-        "risks": "リスクと対策",
-        "evidence": "証拠（引用）",
-        "links": "リンク",
-        "tags": "タグ",
-    },
-}
-
-
-def render_display_text(record: "DecisionRecord", language: str = "en") -> str:
-    """Render a DecisionRecord with localized headers for user display.
-
-    Unlike render_payload_text() (which is always English for embedding consistency),
-    this function uses localized section headers for human-readable presentation.
-
-    Args:
-        record: DecisionRecord to render
-        language: ISO 639-1 language code
-
-    Returns:
-        Localized markdown text for display
-    """
-    headers = PAYLOAD_HEADERS.get(language, PAYLOAD_HEADERS["en"])
-
-    domain = record.domain.value if hasattr(record.domain, 'value') else str(record.domain)
-    sensitivity = record.sensitivity.value if hasattr(record.sensitivity, 'value') else str(record.sensitivity)
-    status = record.status.value if hasattr(record.status, 'value') else str(record.status)
-    certainty = record.why.certainty.value if hasattr(record.why.certainty, 'value') else str(record.why.certainty)
-
-    alternatives = _format_alternatives(
-        record.context.alternatives,
-        record.context.chosen
-    )
-    trade_offs = _format_trade_offs(record.context.trade_offs)
-    assumptions = _format_assumptions(record.context.assumptions)
-    risks = _format_risks(record.context.risks)
-    evidence_block = _format_evidence(record.evidence)
-    links = _format_links(record.links)
-    tags = _format_tags(record.tags)
-
-    rationale = record.why.rationale_summary or "(no rationale documented)"
-    if record.why.missing_info:
-        rationale += "\n\nMissing Information:\n" + "\n".join(f"- {m}" for m in record.why.missing_info)
-
-    text = f"""# {headers['decision_record']}: {record.title}
-ID: {record.id}
-Status: {status} | Sensitivity: {sensitivity} | Domain: {domain}
-When/Where: {record.decision.when or '(unknown)'} | {record.decision.where or '(unknown)'}
-
-## {headers['decision']}
-{record.decision.what}
-
-## {headers['problem']}
-{record.context.problem or '(not documented)'}
-
-## {headers['alternatives']}
-{alternatives}
-
-## {headers['rationale']}
-{rationale}
-{headers['certainty']}: {certainty}
-
-## {headers['trade_offs']}
-{trade_offs}
-
-## {headers['assumptions']}
-{assumptions}
-
-## {headers['risks']}
-{risks}
-
-## {headers['evidence']}
-{evidence_block}
-
-## {headers['links']}
-{links}
-
-## {headers['tags']}
-{tags}
-"""
-
-    while "\n\n\n" in text:
-        text = text.replace("\n\n\n", "\n\n")
-
-    return text.strip()
diff --git a/agents/retriever/__init__.py b/agents/retriever/__init__.py
deleted file mode 100644
index 076d150..0000000
--- a/agents/retriever/__init__.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Retriever Agent - Organizational Context Retrieval
-
-Searches organizational memory and synthesizes answers using LLM.
-
-Key Components:
-- QueryProcessor: Parses and expands user queries
-- Searcher: Searches enVector for relevant context
-- Synthesizer: LLM-based answer synthesis from payload.text
-
-Pipeline:
-1. Parse user query (intent, entities, time scope)
-2. Search enVector for relevant Decision Records
-3. Extract payload.text from results
-4. Synthesize answer with LLM (respecting certainty levels)
-"""
-
-from .query_processor import QueryProcessor, ParsedQuery
-from .searcher import Searcher, SearchResult
-from .synthesizer import Synthesizer, SynthesizedAnswer
-
-__all__ = [
-    "QueryProcessor",
-    "ParsedQuery",
-    "Searcher",
-    "SearchResult",
-    "Synthesizer",
-    "SynthesizedAnswer",
-]
diff --git a/agents/retriever/query_processor.py b/agents/retriever/query_processor.py
deleted file mode 100644
index 81c676f..0000000
--- a/agents/retriever/query_processor.py
+++ /dev/null
@@ -1,436 +0,0 @@
-"""
-Query Processor
-
-Parses and analyzes user queries to understand intent and extract entities.
-Uses patterns from retrieval-patterns.md for intent classification.
-Supports multilingual queries via LLM-based intent classification + translation.
-"""
-
-import json
-import logging
-import re
-from typing import List, Optional
-from dataclasses import dataclass, field
-from enum import Enum
-
-from ..common.language import LanguageInfo, detect_language
-from ..common.llm_client import LLMClient
-from ..common.llm_utils import parse_llm_json
-
-logger = logging.getLogger("rune.retriever.query")
-
-
-class QueryIntent(str, Enum):
-    """Types of query intent"""
-    DECISION_RATIONALE = "decision_rationale"  # "Why did we choose X?"
-    FEATURE_HISTORY = "feature_history"  # "Have customers asked for X?"
-    PATTERN_LOOKUP = "pattern_lookup"  # "How do we handle X?"
-    TECHNICAL_CONTEXT = "technical_context"  # "What's our architecture for X?"
-    SECURITY_COMPLIANCE = "security_compliance"  # "What are the security requirements?"
-    HISTORICAL_CONTEXT = "historical_context"  # "When did we decide X?"
-    ATTRIBUTION = "attribution"  # "Who decided on X?"
-    GENERAL = "general"  # Catch-all
-
-
-class TimeScope(str, Enum):
-    """Time scope for queries"""
-    LAST_WEEK = "last_week"
-    LAST_MONTH = "last_month"
-    LAST_QUARTER = "last_quarter"
-    LAST_YEAR = "last_year"
-    ALL_TIME = "all_time"
-
-
-@dataclass
-class ParsedQuery:
-    """Parsed representation of a user query"""
-    original: str
-    cleaned: str
-    intent: QueryIntent
-    time_scope: TimeScope = TimeScope.ALL_TIME
-    entities: List[str] = field(default_factory=list)
-    keywords: List[str] = field(default_factory=list)
-    expanded_queries: List[str] = field(default_factory=list)
-    language: Optional[LanguageInfo] = None
-
-
-class QueryProcessor:
-    """
-    Processes user queries for organizational memory search.
-
-    Responsibilities:
-    1. Clean and normalize query text
-    2. Detect query intent (why, how, what, when, who)
-    3. Extract entities and keywords
-    4. Determine time scope
-    5. Generate query expansions for better recall
-    """
-
-    # Intent detection patterns (from retrieval-patterns.md)
-    INTENT_PATTERNS = {
-        QueryIntent.DECISION_RATIONALE: [
-            r"why did we (choose|decide|go with|select|pick)",
-            r"what was the (reasoning|rationale|logic|thinking)",
-            r"why .+ over .+",
-            r"what were the (reasons|factors)",
-            r"why (not|didn't we)",
-            r"reasoning behind",
-        ],
-        QueryIntent.FEATURE_HISTORY: [
-            r"(have|did) (customers?|users?) (asked|requested|wanted)",
-            r"feature request",
-            r"why did we (reject|say no|decline)",
-            r"(how many|which) customers",
-            r"customer feedback (on|about)",
-        ],
-        QueryIntent.PATTERN_LOOKUP: [
-            r"how do we (handle|deal with|approach|manage)",
-            r"what'?s our (approach|process|standard|convention)",
-            r"is there (an?|existing) (pattern|standard|convention)",
-            r"what'?s the (best practice|recommended way)",
-            r"how should (we|I)",
-        ],
-        QueryIntent.TECHNICAL_CONTEXT: [
-            r"what'?s our (architecture|design|system) for",
-            r"how (does|is) .+ (implemented|built|designed)",
-            r"(explain|describe) (the|our) .+ (system|architecture|design)",
-            r"technical (details|overview) (of|for)",
-        ],
-        QueryIntent.SECURITY_COMPLIANCE: [
-            r"(security|compliance) (requirements?|considerations?)",
-            r"what (security|privacy) (measures|controls)",
-            r"(gdpr|hipaa|sox|pci) (requirements?|compliance)",
-            r"audit (requirements?|trail)",
-        ],
-        QueryIntent.HISTORICAL_CONTEXT: [
-            r"when did we (decide|choose|implement|launch)",
-            r"(history|timeline) of",
-            r"(have|did) we (ever|previously)",
-            r"how long (have|has) .+ been",
-        ],
-        QueryIntent.ATTRIBUTION: [
-            r"who (decided|chose|approved|owns)",
-            r"which (team|person|group) (is responsible|decided|owns)",
-            r"(owner|maintainer) of",
-        ],
-    }
-
-    # Time scope patterns
-    TIME_PATTERNS = {
-        TimeScope.LAST_WEEK: [r"last week", r"this week", r"past week", r"7 days"],
-        TimeScope.LAST_MONTH: [r"last month", r"this month", r"past month", r"30 days"],
-        TimeScope.LAST_QUARTER: [r"last quarter", r"this quarter", r"Q[1-4]", r"past 3 months"],
-        TimeScope.LAST_YEAR: [r"last year", r"this year", r"20\d{2}", r"past year"],
-    }
-
-    # Stop words to filter from keywords
-    STOP_WORDS = {
-        "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
-        "have", "has", "had", "do", "does", "did", "will", "would", "could",
-        "should", "may", "might", "must", "shall", "can", "need", "dare",
-        "ought", "used", "to", "of", "in", "for", "on", "with", "at", "by",
-        "from", "up", "about", "into", "over", "after", "we", "our", "us",
-        "i", "me", "my", "you", "your", "it", "its", "they", "them", "their",
-        "this", "that", "these", "those", "what", "which", "who", "whom",
-        "when", "where", "why", "how", "and", "or", "but", "if", "because",
-        "as", "until", "while", "although", "though", "even", "just", "also",
-    }
-
-    # LLM prompt for multilingual query parsing
-    QUERY_PARSE_PROMPT = """Analyze this user query and extract structured information.
-The query may be in any language. Translate all outputs to English.
-
-Respond with a valid JSON object:
-{{
-    "intent": one of ["decision_rationale", "feature_history", "pattern_lookup", "technical_context", "security_compliance", "historical_context", "attribution", "general"],
-    "english_query": "the query translated to English",
-    "entities": ["list", "of", "named", "entities"],
-    "keywords": ["important", "keywords", "in", "english"],
-    "time_scope": one of ["last_week", "last_month", "last_quarter", "last_year", "all_time"]
-}}
-
-Query:
-<user_query>
-{query}
-</user_query>
-
-JSON:"""
-
-    def __init__(
-        self,
-        llm_provider: str = "anthropic",
-        anthropic_api_key: Optional[str] = None,
-        openai_api_key: Optional[str] = None,
-        google_api_key: Optional[str] = None,
-        model: str = "claude-sonnet-4-20250514",
-    ):
-        """Initialize query processor.
-
-        Args:
-            llm_provider: LLM provider to use
-            anthropic_api_key: Optional API key for Anthropic
-            openai_api_key: Optional API key for OpenAI
-            google_api_key: Optional API key for Gemini
-            model: Provider model to use
-        """
-        self._llm = None
-        self._model = model
-
-        self._llm = LLMClient(
-            provider=llm_provider,
-            model=model,
-            anthropic_api_key=anthropic_api_key,
-            openai_api_key=openai_api_key,
-            google_api_key=google_api_key,
-        )
-
-    def parse(self, query: str) -> ParsedQuery:
-        """
-        Parse a user query into structured form.
-
-        Args:
-            query: Raw user query string
-
-        Returns:
-            ParsedQuery with intent, entities, and expansions
-        """
-        language = detect_language(query)
-
-        if language.is_english or not self._llm or not self._llm.is_available:
-            # English path: existing regex (unchanged)
-            return self._parse_english(query, language)
-        else:
-            # Non-English path: LLM classification + translation
-            return self._parse_multilingual(query, language)
-
-    def _parse_english(self, query: str, language: Optional[LanguageInfo] = None) -> ParsedQuery:
-        """Parse English query using regex patterns (original logic)."""
-        # Clean query
-        cleaned = self._clean_query(query)
-
-        # Detect intent
-        intent = self._detect_intent(cleaned)
-
-        # Detect time scope
-        time_scope = self._detect_time_scope(cleaned)
-
-        # Extract entities
-        entities = self._extract_entities(query)
-
-        # Extract keywords
-        keywords = self._extract_keywords(cleaned)
-
-        # Generate query expansions
-        expanded = self._generate_expansions(cleaned, intent, entities)
-
-        return ParsedQuery(
-            original=query,
-            cleaned=cleaned,
-            intent=intent,
-            time_scope=time_scope,
-            entities=entities,
-            keywords=keywords,
-            expanded_queries=expanded,
-            language=language,
-        )
-
-    def _parse_multilingual(self, query: str, language: LanguageInfo) -> ParsedQuery:
-        """Parse non-English query using LLM for intent classification + translation."""
-        try:
-            prompt = self.QUERY_PARSE_PROMPT.format(query=query)
-            raw = self._llm.generate(
-                prompt,
-                max_tokens=256,
-                timeout=30.0,
-            )
-            result = parse_llm_json(raw)
-
-            # Map intent string to enum
-            intent_map = {v.value: v for v in QueryIntent}
-            intent = intent_map.get(result.get("intent", ""), QueryIntent.GENERAL)
-
-            # Map time_scope string to enum
-            scope_map = {v.value: v for v in TimeScope}
-            time_scope = scope_map.get(result.get("time_scope", ""), TimeScope.ALL_TIME)
-
-            english_query = result.get("english_query", query)
-
-            # expanded_queries: original + English translation (both searched)
-            expanded = [query, english_query]
-            # Add intent-based expansions on the English translation
-            english_expansions = self._generate_expansions(
-                english_query.lower(), intent, result.get("entities", [])
-            )
-            for exp in english_expansions:
-                if exp not in expanded:
-                    expanded.append(exp)
-
-            return ParsedQuery(
-                original=query,
-                cleaned=query,
-                intent=intent,
-                time_scope=time_scope,
-                entities=result.get("entities", []),
-                keywords=result.get("keywords", []),
-                expanded_queries=expanded[:7],
-                language=language,
-            )
-        except Exception as e:
-            logger.warning("LLM parsing failed: %s", e)
-            # Fallback to regex parsing
-            return self._parse_english(query, language)
-
-    def _clean_query(self, query: str) -> str:
-        """Clean and normalize query text"""
-        # Lowercase
-        cleaned = query.lower().strip()
-
-        # Remove extra whitespace
-        cleaned = re.sub(r'\s+', ' ', cleaned)
-
-        # Remove trailing punctuation (but keep question marks)
-        cleaned = re.sub(r'[.!,;:]+$', '', cleaned)
-
-        return cleaned
-
-    def _detect_intent(self, query: str) -> QueryIntent:
-        """Detect the primary intent of the query"""
-        query_lower = query.lower()
-
-        for intent, patterns in self.INTENT_PATTERNS.items():
-            for pattern in patterns:
-                if re.search(pattern, query_lower, re.IGNORECASE):
-                    return intent
-
-        return QueryIntent.GENERAL
-
-    def _detect_time_scope(self, query: str) -> TimeScope:
-        """Detect time scope from query"""
-        query_lower = query.lower()
-
-        for scope, patterns in self.TIME_PATTERNS.items():
-            for pattern in patterns:
-                if re.search(pattern, query_lower, re.IGNORECASE):
-                    return scope
-
-        return TimeScope.ALL_TIME
-
-    def _extract_entities(self, query: str) -> List[str]:
-        """Extract named entities from query"""
-        entities = []
-
-        # Extract quoted strings
-        quoted = re.findall(r'"([^"]+)"|\'([^\']+)\'', query)
-        for q in quoted:
-            entity = q[0] or q[1]
-            if entity and len(entity) > 1:
-                entities.append(entity)
-
-        # Extract capitalized words/phrases (potential proper nouns)
-        # But not at the start of sentences
-        words = query.split()
-        for i, word in enumerate(words):
-            if i > 0 and word[0].isupper() and len(word) > 1:
-                # Check if it's a multi-word entity
-                phrase = [word]
-                j = i + 1
-                while j < len(words) and words[j][0].isupper():
-                    phrase.append(words[j])
-                    j += 1
-                entity = ' '.join(phrase)
-                if entity not in entities:
-                    entities.append(entity)
-
-        # Extract technology names (common patterns)
-        tech_patterns = [
-            r'\b(PostgreSQL|MySQL|MongoDB|Redis|Elasticsearch|Kafka)\b',
-            r'\b(React|Vue|Angular|Next\.js|Node\.js|Python|Java|Go)\b',
-            r'\b(AWS|GCP|Azure|Kubernetes|Docker|Terraform)\b',
-            r'\b(REST|GraphQL|gRPC|WebSocket|HTTP|HTTPS)\b',
-        ]
-        for pattern in tech_patterns:
-            matches = re.findall(pattern, query, re.IGNORECASE)
-            entities.extend(matches)
-
-        # Deduplicate and return
-        return list(dict.fromkeys(entities))[:10]
-
-    def _extract_keywords(self, query: str) -> List[str]:
-        """Extract important keywords from query"""
-        # Split into words
-        words = re.findall(r'\b\w+\b', query.lower())
-
-        # Filter stop words and short words
-        keywords = [
-            w for w in words
-            if w not in self.STOP_WORDS and len(w) > 2
-        ]
-
-        # Deduplicate and return
-        return list(dict.fromkeys(keywords))[:15]
-
-    def _generate_expansions(
-        self,
-        query: str,
-        intent: QueryIntent,
-        entities: List[str]
-    ) -> List[str]:
-        """Generate query expansions for better recall"""
-        expansions = [query]  # Include original
-
-        # Intent-based expansions
-        if intent == QueryIntent.DECISION_RATIONALE:
-            expansions.extend([
-                f"decision {query}",
-                f"rationale {query}",
-                f"trade-off {query}",
-            ])
-        elif intent == QueryIntent.FEATURE_HISTORY:
-            expansions.extend([
-                f"customer request {query}",
-                f"feature rejected {query}",
-            ])
-        elif intent == QueryIntent.PATTERN_LOOKUP:
-            expansions.extend([
-                f"standard approach {query}",
-                f"best practice {query}",
-            ])
-        elif intent == QueryIntent.TECHNICAL_CONTEXT:
-            expansions.extend([
-                f"architecture {query}",
-                f"implementation {query}",
-            ])
-
-        # Entity-based expansions
-        for entity in entities[:3]:
-            expansions.append(f"{entity} decision")
-            expansions.append(f"why {entity}")
-
-        # Deduplicate and limit
-        seen = set()
-        unique = []
-        for exp in expansions:
-            if exp.lower() not in seen:
-                seen.add(exp.lower())
-                unique.append(exp)
-
-        return unique[:5]
-
-    def format_for_search(self, parsed: ParsedQuery) -> str:
-        """
-        Format parsed query for enVector search.
-
-        Combines original query with key entities and keywords
-        for better semantic matching.
-        """
-        parts = [parsed.cleaned]
-
-        # Add entities
-        if parsed.entities:
-            parts.append("entities: " + ", ".join(parsed.entities[:3]))
-
-        # Add keywords
-        if parsed.keywords:
-            parts.append("keywords: " + ", ".join(parsed.keywords[:5]))
-
-        return " | ".join(parts)
diff --git a/agents/retriever/searcher.py b/agents/retriever/searcher.py
deleted file mode 100644
index e1d82bc..0000000
--- a/agents/retriever/searcher.py
+++ /dev/null
@@ -1,576 +0,0 @@
-"""
-Searcher
-
-Searches organizational memory via enVector.
-Uses the Vault-secured pipeline: scoring → decrypt → metadata.
-Returns Decision Records with their payload.text for synthesis.
-
-v0.2.4 changes:
-- Recency weighting on returned results (benign re-ranking)
-- Group assembly from already-fetched results (no over-fetch)
-- Client-side metadata filters (best-effort on returned top-k)
-
-NOTE: Full metadata filtering, group assembly, and recency weighting
-should happen Vault-side (rune-admin) to preserve the security model.
-The client NEVER requests more than the user's topk from Vault.
-"""
-
-import json
-import logging
-from datetime import datetime, timedelta, timezone
-from typing import List, Dict, Any, Optional
-from dataclasses import dataclass, field
-
-from ..common.envector_client import EnVectorClient
-from ..common.embedding_service import EmbeddingService
-from .query_processor import ParsedQuery, TimeScope
-
-logger = logging.getLogger("rune.retriever.searcher")
-
-# Recency weighting parameters (applied client-side on returned top-k)
-HALF_LIFE_DAYS = 90
-SIMILARITY_WEIGHT = 0.7
-RECENCY_WEIGHT = 0.3
-
-STATUS_MULTIPLIER = {
-    "accepted": 1.0,
-    "proposed": 0.9,
-    "superseded": 0.5,
-    "reverted": 0.3,
-}
-
-
-@dataclass
-class SearchResult:
-    """A single search result from enVector"""
-    record_id: str
-    title: str
-    payload_text: str  # The key output for synthesis
-    domain: str
-    certainty: str
-    status: str
-    score: float
-    reusable_insight: str = ""  # Schema 2.1+: dense NL gist (primary embedding text)
-    adjusted_score: float = 0.0  # After recency weighting + status penalty
-    metadata: Dict[str, Any] = field(default_factory=dict)
-    # Group fields (phase_chain or bundle)
-    group_id: Optional[str] = None
-    group_type: Optional[str] = None
-    phase_seq: Optional[int] = None
-    phase_total: Optional[int] = None
-
-    @property
-    def is_reliable(self) -> bool:
-        """Check if result has reliable evidence"""
-        return self.certainty in ("supported", "partially_supported")
-
-    @property
-    def is_phase(self) -> bool:
-        """Check if this result is part of a group (phase_chain or bundle)"""
-        return self.group_id is not None
-
-    @property
-    def summary(self) -> str:
-        """Short summary for display"""
-        return f"{self.title} ({self.domain}, {self.certainty})"
-
-
-class Searcher:
-    """
-    Searches organizational memory using enVector.
-
-    Uses the Vault-secured pipeline (scoring → decrypt → metadata)
-    when a vault_client is provided. Falls back to direct search otherwise.
-
-    Security model: the client NEVER requests more than the user's topk
-    from Vault. Over-fetch + post-filter must happen Vault-side.
-
-    v0.2.4 client-side enhancements (on already-returned results only):
-    - Assemble groups from results already in the result set
-    - Apply recency weighting (re-ranking, not filtering)
-    - Best-effort metadata filters (reduces result count, not ideal)
-    """
-
-    def __init__(
-        self,
-        envector_client: EnVectorClient,
-        embedding_service: EmbeddingService,
-        index_name: str,
-        vault_client=None,
-    ):
-        self._client = envector_client
-        self._embedding = embedding_service
-        self._index_name = index_name
-        self._vault = vault_client
-
-    async def search(
-        self,
-        query: ParsedQuery,
-        topk: Optional[int] = None,
-        filters: Optional[Dict[str, Any]] = None,
-    ) -> List[SearchResult]:
-        """
-        Search for relevant Decision Records.
-
-        Args:
-            query: Parsed query from QueryProcessor
-            topk: Number of results to return (passed to Vault as-is)
-            filters: Optional metadata filters (best-effort client-side;
-                     full support requires Vault-side implementation):
-                - domain: str (e.g. "architecture")
-                - status: str (e.g. "accepted")
-                - since: str (ISO date, e.g. "2026-01-01")
-
-        Returns:
-            List of SearchResult objects sorted by adjusted relevance
-        """
-        topk = topk or 10
-
-        # Step 1: Search with multi-query expansion (respects Vault's topk limit)
-        all_results = await self._search_with_expansions(query, topk)
-
-        # Step 2: Expand phase chains for groups with missing siblings
-        all_results = await self._expand_phase_chains(all_results)
-
-        # Step 3: Assemble groups (order by phase_seq, interleave with standalone)
-        all_results = self._assemble_groups(all_results)
-
-        # Step 4: Best-effort metadata filters (client-side, on complete results)
-        # NOTE: This may reduce result count below topk. Full support
-        # requires Vault-side filtering with internal over-fetch.
-        if filters:
-            all_results = self._apply_metadata_filters(all_results, filters)
-
-        # Step 5: Time scope filter
-        if query.time_scope != TimeScope.ALL_TIME:
-            all_results = self._filter_by_time(all_results, query.time_scope)
-
-        # Step 6: Recency weighting (re-ranks returned results, no security issue)
-        all_results = self._apply_recency_weighting(all_results)
-
-        return all_results[:topk]
-
-    async def _search_with_expansions(
-        self, query: ParsedQuery, topk: int
-    ) -> List[SearchResult]:
-        """Search with multiple query expansions, dedup results."""
-        all_results = []
-        seen_ids = set()
-
-        for expanded_query in query.expanded_queries[:3]:
-            results = await self._search_single(expanded_query, topk)
-            for result in results:
-                if result.record_id not in seen_ids:
-                    seen_ids.add(result.record_id)
-                    all_results.append(result)
-
-        # Also search with original query
-        if query.original not in query.expanded_queries:
-            results = await self._search_single(query.original, topk)
-            for result in results:
-                if result.record_id not in seen_ids:
-                    seen_ids.add(result.record_id)
-                    all_results.append(result)
-
-        all_results.sort(key=lambda r: r.score, reverse=True)
-        return all_results
-
-    def _assemble_groups(self, results: List[SearchResult]) -> List[SearchResult]:
-        """
-        Assemble group members from already-fetched results.
-
-        When multiple phases of the same group are in the result set,
-        group them together ordered by phase_seq. Does NOT over-fetch;
-        missing siblings are handled by _expand_phase_chains.
-        """
-        if not results:
-            return results
-
-        groups: Dict[str, List[SearchResult]] = {}
-        group_best_score: Dict[str, float] = {}
-        standalone = []
-
-        for r in results:
-            if r.is_phase and r.group_id:
-                groups.setdefault(r.group_id, []).append(r)
-                group_best_score[r.group_id] = max(
-                    group_best_score.get(r.group_id, 0.0), r.score
-                )
-            else:
-                standalone.append(r)
-
-        if not groups:
-            return results
-
-        for gid in groups:
-            groups[gid].sort(key=lambda r: r.phase_seq if r.phase_seq is not None else 0)
-
-        # Interleave: insert groups at their best-score position
-        all_items = []
-        for r in standalone:
-            all_items.append((r.score, "standalone", r))
-        for gid, best_score in group_best_score.items():
-            all_items.append((best_score, "group", gid))
-
-        all_items.sort(key=lambda x: x[0], reverse=True)
-
-        assembled = []
-        inserted_groups = set()
-        for score, item_type, item in all_items:
-            if item_type == "standalone":
-                assembled.append(item)
-            elif item_type == "group" and item not in inserted_groups:
-                inserted_groups.add(item)
-                assembled.extend(groups[item])
-
-        return assembled
-
-    def _apply_metadata_filters(
-        self, results: List[SearchResult], filters: Dict[str, Any]
-    ) -> List[SearchResult]:
-        """
-        Best-effort metadata filters on already-returned results.
-
-        WARNING: This reduces result count and may return fewer than topk.
-        Full metadata filtering requires Vault-side implementation with
-        internal over-fetch to maintain result count.
-        """
-        filtered = results
-
-        domain = filters.get("domain")
-        if domain:
-            filtered = [r for r in filtered if r.domain == domain]
-
-        status = filters.get("status")
-        if status:
-            filtered = [r for r in filtered if r.status == status]
-
-        since = filters.get("since")
-        if since:
-            filtered = self._filter_since(filtered, since)
-
-        return filtered
-
-    def _filter_since(self, results: List[SearchResult], since_date: str) -> List[SearchResult]:
-        """Filter results after a given ISO date."""
-        filtered = []
-        for r in results:
-            ts_str = r.metadata.get("timestamp")
-            if ts_str:
-                try:
-                    if isinstance(ts_str, str):
-                        ts = datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
-                    else:
-                        ts = datetime.fromtimestamp(float(ts_str), tz=timezone.utc)
-                    if ts.isoformat() >= since_date:
-                        filtered.append(r)
-                except (ValueError, TypeError):
-                    filtered.append(r)
-            else:
-                filtered.append(r)
-        return filtered
-
-    def _apply_recency_weighting(self, results: List[SearchResult]) -> List[SearchResult]:
-        """
-        Apply time decay and status-based scoring on returned results.
-
-        This is a benign re-ranking of already-returned results.
-        No security concern: the client only sees what Vault already returned.
-        """
-        now = datetime.now(timezone.utc)
-
-        for r in results:
-            age_days = 0
-            ts_str = r.metadata.get("timestamp")
-            if ts_str:
-                try:
-                    if isinstance(ts_str, str):
-                        ts = datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
-                    else:
-                        ts = datetime.fromtimestamp(float(ts_str), tz=timezone.utc)
-                    age_days = max(0, (now - ts).days)
-                except (ValueError, TypeError):
-                    pass
-
-            decay = 0.5 ** (age_days / HALF_LIFE_DAYS) if HALF_LIFE_DAYS > 0 else 1.0
-            status_mult = STATUS_MULTIPLIER.get(r.status, 1.0)
-            r.adjusted_score = (SIMILARITY_WEIGHT * r.score + RECENCY_WEIGHT * decay) * status_mult
-
-        results.sort(key=lambda r: r.adjusted_score, reverse=True)
-        return results
-
-    # ================================================================
-    # Phase chain expansion (additional searches for missing siblings)
-    # ================================================================
-
-    async def _expand_phase_chains(
-        self,
-        results: List[SearchResult],
-        max_chains: int = 2,
-    ) -> List[SearchResult]:
-        """
-        Expand phase chain results by fetching sibling phases.
-
-        When a search result is part of a phase chain, searches for the
-        group_id to retrieve all sibling phases and inserts them in order.
-        Each sibling search respects Vault's topk limit.
-        """
-        seen_groups = set()
-        groups_to_expand = []
-        for r in results:
-            if r.is_phase and r.group_id not in seen_groups:
-                # Check if all siblings are already present
-                total = r.phase_total or 0
-                present = sum(1 for x in results if x.group_id == r.group_id)
-                if present < total:
-                    seen_groups.add(r.group_id)
-                    groups_to_expand.append(r.group_id)
-
-        if not groups_to_expand:
-            return results
-
-        groups_to_expand = groups_to_expand[:max_chains]
-
-        group_siblings: Dict[str, List[SearchResult]] = {}
-        existing_ids = {r.record_id for r in results}
-
-        for group_id in groups_to_expand:
-            siblings = await self._search_single(f"Group: {group_id}", topk=10)
-            chain = [s for s in siblings if s.group_id == group_id and s.record_id not in existing_ids]
-            chain.sort(key=lambda s: s.phase_seq if s.phase_seq is not None else 0)
-            group_siblings[group_id] = chain
-
-        expanded = []
-        expanded_ids = set()
-
-        for r in results:
-            if r.record_id in expanded_ids:
-                continue
-
-            if r.is_phase and r.group_id in group_siblings:
-                # Insert all siblings (including this one) in phase order
-                all_in_group = [x for x in results if x.group_id == r.group_id] + group_siblings[r.group_id]
-                seen_in_group = set()
-                all_in_group.sort(key=lambda x: x.phase_seq if x.phase_seq is not None else 0)
-                for sibling in all_in_group:
-                    if sibling.record_id not in expanded_ids and sibling.record_id not in seen_in_group:
-                        expanded.append(sibling)
-                        expanded_ids.add(sibling.record_id)
-                        seen_in_group.add(sibling.record_id)
-                del group_siblings[r.group_id]
-            elif r.record_id not in expanded_ids:
-                expanded.append(r)
-                expanded_ids.add(r.record_id)
-
-        return expanded
-
-    # ================================================================
-    # Low-level search methods
-    # ================================================================
-
-    async def _search_single(self, query_text: str, topk: int) -> List[SearchResult]:
-        """Execute a single search query via Vault-secured pipeline."""
-        return await self._search_via_vault(query_text, topk)
-
-    async def _search_via_vault(self, query_text: str, topk: int) -> List[SearchResult]:
-        """
-        Vault-secured search pipeline:
-        1. Embed query → encrypted similarity scoring on enVector Cloud
-        2. Vault decrypts result ciphertext, selects top-k
-        3. Retrieve encrypted metadata from enVector Cloud
-        4. Vault decrypts metadata
-        """
-        try:
-            query_vector = self._embedding.embed_single(query_text)
-
-            scoring_result = self._client.score(self._index_name, query_vector)
-            if not scoring_result.get("ok"):
-                logger.warning("Scoring failed: %s", scoring_result.get("error"))
-                return []
-
-            blobs = scoring_result.get("encrypted_blobs", [])
-            if not blobs:
-                return []
-
-            vault_result = await self._vault.decrypt_search_results(
-                encrypted_blob_b64=blobs[0],
-                top_k=topk,
-            )
-            if not vault_result.ok:
-                logger.warning("Vault decrypt failed: %s", vault_result.error)
-                return []
-
-            if not vault_result.results:
-                return []
-
-            metadata_result = self._client.remind(
-                self._index_name,
-                vault_result.results,
-                output_fields=["metadata"],
-            )
-            if not metadata_result.get("ok"):
-                logger.warning("Metadata retrieval failed: %s", metadata_result.get("error"))
-                return []
-
-            encrypted_entries = metadata_result.get("results", [])
-
-            vault_decrypt_items = []
-            for idx, entry in enumerate(encrypted_entries):
-                data = entry.get("data", "")
-                if not data:
-                    continue
-
-                try:
-                    parsed = json.loads(data)
-                    if isinstance(parsed, dict) and "a" in parsed and "c" in parsed:
-                        vault_decrypt_items.append((idx, data))
-                    else:
-                        entry["metadata"] = parsed
-                        entry.pop("data", None)
-                except (json.JSONDecodeError, TypeError):
-                    import base64
-                    try:
-                        raw = base64.b64decode(data)
-                        parsed = json.loads(raw)
-                        entry["metadata"] = parsed
-                        entry.pop("data", None)
-                    except Exception:
-                        logger.warning("Entry %d: unrecognized metadata format, skipping", idx)
-                        entry["metadata"] = {}
-                        entry.pop("data", None)
-
-            if vault_decrypt_items:
-                try:
-                    decrypted_metadata = await self._vault.decrypt_metadata(
-                        encrypted_metadata_list=[data for _, data in vault_decrypt_items]
-                    )
-                    for dec_idx, (entry_idx, _) in enumerate(vault_decrypt_items):
-                        if dec_idx < len(decrypted_metadata):
-                            encrypted_entries[entry_idx]["metadata"] = decrypted_metadata[dec_idx]
-                            encrypted_entries[entry_idx].pop("data", None)
-                except Exception:
-                    logger.info("Batch decrypt failed, falling back to per-entry decrypt")
-                    for entry_idx, data in vault_decrypt_items:
-                        try:
-                            single = await self._vault.decrypt_metadata(
-                                encrypted_metadata_list=[data]
-                            )
-                            if single:
-                                encrypted_entries[entry_idx]["metadata"] = single[0]
-                                encrypted_entries[entry_idx].pop("data", None)
-                        except Exception as e:
-                            logger.debug("Entry %d decrypt failed: %s", entry_idx, e)
-                            encrypted_entries[entry_idx]["metadata"] = {}
-                            encrypted_entries[entry_idx].pop("data", None)
-
-            return [self._to_search_result(r) for r in encrypted_entries]
-
-        except Exception as e:
-            logger.error("Vault search error: %s", e, exc_info=True)
-            return []
-
-    def _to_search_result(self, raw: Dict[str, Any]) -> SearchResult:
-        """Convert raw result to SearchResult"""
-        metadata = raw.get("metadata", {})
-
-        record_id = metadata.get("id", raw.get("id", "unknown"))
-        title = metadata.get("title", "Untitled")
-        domain = metadata.get("domain", "general")
-        status = metadata.get("status", "unknown")
-
-        why = metadata.get("why", {})
-        if isinstance(why, dict):
-            certainty = why.get("certainty", "unknown")
-        else:
-            certainty = "unknown"
-
-        payload = metadata.get("payload", {})
-        if isinstance(payload, dict):
-            payload_text = payload.get("text", "")
-        else:
-            payload_text = metadata.get("text", raw.get("text", ""))
-
-        if not payload_text:
-            decision = metadata.get("decision", {})
-            if isinstance(decision, dict):
-                payload_text = decision.get("what", "")
-
-        reusable_insight = metadata.get("reusable_insight", "")
-
-        group_id = metadata.get("group_id")
-        group_type = metadata.get("group_type")
-        phase_seq = metadata.get("phase_seq")
-        phase_total = metadata.get("phase_total")
-
-        score = raw.get("score", 0.0)
-        return SearchResult(
-            record_id=record_id,
-            title=title,
-            payload_text=payload_text,
-            domain=domain,
-            certainty=certainty,
-            status=status,
-            score=score,
-            reusable_insight=reusable_insight,
-            adjusted_score=score,
-            metadata=metadata,
-            group_id=group_id,
-            group_type=group_type,
-            phase_seq=phase_seq,
-            phase_total=phase_total,
-        )
-
-    def _filter_by_time(
-        self,
-        results: List[SearchResult],
-        time_scope: TimeScope
-    ) -> List[SearchResult]:
-        """Filter results by time scope"""
-        now = datetime.now(timezone.utc)
-
-        time_ranges = {
-            TimeScope.LAST_WEEK: timedelta(days=7),
-            TimeScope.LAST_MONTH: timedelta(days=30),
-            TimeScope.LAST_QUARTER: timedelta(days=90),
-            TimeScope.LAST_YEAR: timedelta(days=365),
-        }
-
-        if time_scope not in time_ranges:
-            return results
-
-        cutoff = now - time_ranges[time_scope]
-        filtered = []
-
-        for result in results:
-            timestamp_str = result.metadata.get("timestamp")
-            if timestamp_str:
-                try:
-                    if isinstance(timestamp_str, str):
-                        ts = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
-                    else:
-                        ts = datetime.fromtimestamp(float(timestamp_str), tz=timezone.utc)
-                    if ts >= cutoff:
-                        filtered.append(result)
-                except (ValueError, TypeError):
-                    filtered.append(result)
-            else:
-                filtered.append(result)
-
-        return filtered
-
-    async def search_by_id(self, record_id: str) -> Optional[SearchResult]:
-        """Search for a specific record by ID."""
-        results = await self._search_single(f"ID: {record_id}", topk=5)
-        for result in results:
-            if result.record_id == record_id:
-                return result
-        return None
-
-    async def get_related(self, record_id: str, topk: int = 5) -> List[SearchResult]:
-        """Find records related to a given record."""
-        record = await self.search_by_id(record_id)
-        if not record:
-            return []
-        search_text = record.reusable_insight.strip() or record.payload_text[:500]
-        results = await self._search_single(search_text, topk + 1)
-        return [r for r in results if r.record_id != record_id][:topk]
diff --git a/agents/retriever/synthesizer.py b/agents/retriever/synthesizer.py
deleted file mode 100644
index 79c3708..0000000
--- a/agents/retriever/synthesizer.py
+++ /dev/null
@@ -1,482 +0,0 @@
-"""
-Synthesizer
-
-LLM-based answer synthesis from search results.
-Uses payload.text from Decision Records to generate coherent answers.
-
-Key principle: Respect certainty levels from evidence.
-- "supported" → confident answer
-- "partially_supported" → "likely" or "based on partial evidence"
-- "unknown" → "uncertain" or "no clear evidence found"
-"""
-
-import logging
-from typing import List, Optional, Dict, Any
-from dataclasses import dataclass, field
-
-from .searcher import SearchResult
-from .query_processor import ParsedQuery
-from ..common.llm_client import LLMClient
-
-logger = logging.getLogger("rune.retriever.synthesizer")
-
-
-@dataclass
-class SynthesizedAnswer:
-    """Synthesized answer from LLM"""
-    answer: str
-    confidence: float  # 0.0 to 1.0
-    sources: List[Dict[str, Any]]
-    related_queries: List[str] = field(default_factory=list)
-    warnings: List[str] = field(default_factory=list)  # e.g., "based on uncertain evidence"
-
-
-# Synthesis prompt template
-SYNTHESIS_PROMPT = """You are an AI assistant that answers questions based on organizational decision records.
-
-{language_instruction}
-
-Your task is to synthesize an answer from the search results below. Follow these rules strictly:
-
-1. ONLY use information from the provided records. Do NOT make up information.
-2. Respect the certainty level of each record:
-   - "supported": You can state this confidently
-   - "partially_supported": Qualify with "likely" or "based on available evidence"
-   - "unknown": State "uncertain" or "no clear evidence found"
-3. Always cite sources by their record ID
-4. If no relevant information is found, say "No relevant records found in organizational memory."
-5. Be concise but complete.
-
-User Question: {query}
-
-Search Results (Decision Records):
-{records}
-
-Instructions:
-- Synthesize a clear, direct answer to the question
-- Cite record IDs in brackets like [dec_2024-01-01_arch_example]
-- Note any uncertainty from records with "unknown" or "partially_supported" certainty
-- Suggest follow-up queries if helpful
-
-Your Answer:"""
-
-
-# Fallback templates per language
-FALLBACK_TEMPLATES = {
-    "en": """## Search Results for: "{query}"
-
-Found {count} relevant record(s):
-
-{formatted_results}
-
----
-**Note**: This is a direct listing without LLM synthesis.
-Configure an LLM provider key (Anthropic/OpenAI/Google) for natural language answers.
-""",
-    "ko": """## "{query}" 검색 결과
-
-{count}개의 관련 레코드를 찾았습니다:
-
-{formatted_results}
-
----
-**참고**: LLM 합성 없이 직접 목록을 표시합니다.
-자연어 답변을 위해 LLM 제공자 키(Anthropic/OpenAI/Google)를 설정하세요.
-""",
-    "ja": """## "{query}" の検索結果
-
-{count}件の関連レコードが見つかりました:
-
-{formatted_results}
-
----
-**注意**: LLM合成なしの直接リスティングです。
-自然言語での回答にはLLMプロバイダーキー(Anthropic/OpenAI/Google)を設定してください。
-""",
-}
-
-# Keep original for backward compatibility
-FALLBACK_TEMPLATE = FALLBACK_TEMPLATES["en"]
-
-
-class Synthesizer:
-    """
-    Synthesizes answers from search results using LLM.
-
-    Falls back to simple formatting if LLM is not available.
-    """
-
-    def __init__(
-        self,
-        llm_provider: str = "anthropic",
-        anthropic_api_key: Optional[str] = None,
-        openai_api_key: Optional[str] = None,
-        google_api_key: Optional[str] = None,
-        model: str = "claude-sonnet-4-20250514"
-    ):
-        """
-        Initialize synthesizer.
-
-        Args:
-            llm_provider: LLM provider to use
-            anthropic_api_key: Anthropic API key (optional)
-            openai_api_key: OpenAI API key (optional)
-            google_api_key: Gemini API key (optional)
-            model: Model to use for synthesis
-        """
-        self._provider = llm_provider
-        self._model = model
-        self._llm = LLMClient(
-            provider=llm_provider,
-            model=model,
-            anthropic_api_key=anthropic_api_key,
-            openai_api_key=openai_api_key,
-            google_api_key=google_api_key,
-        )
-
-    @property
-    def has_llm(self) -> bool:
-        """Check if LLM is available"""
-        return self._llm.is_available
-
-    def synthesize(
-        self,
-        query: ParsedQuery,
-        results: List[SearchResult]
-    ) -> SynthesizedAnswer:
-        """
-        Synthesize an answer from search results.
-
-        Args:
-            query: Parsed user query
-            results: Search results from Searcher
-
-        Returns:
-            SynthesizedAnswer with answer text and metadata
-        """
-        if not results:
-            return SynthesizedAnswer(
-                answer="No relevant records found in organizational memory.",
-                confidence=0.0,
-                sources=[],
-                related_queries=self._suggest_alternatives(query),
-                warnings=["No search results found"],
-            )
-
-        # Try LLM synthesis first
-        if self.has_llm:
-            try:
-                return self._synthesize_with_llm(query, results)
-            except Exception as e:
-                logger.warning("LLM synthesis failed: %s", e)
-                # Fall through to fallback
-
-        # Fallback to simple formatting
-        return self._synthesize_fallback(query, results)
-
-    def _synthesize_with_llm(
-        self,
-        query: ParsedQuery,
-        results: List[SearchResult]
-    ) -> SynthesizedAnswer:
-        """Synthesize using LLM"""
-        # Format records for prompt
-        records_text = self._format_records_for_prompt(results)
-
-        # Determine language instruction
-        if query.language and not query.language.is_english:
-            language_instruction = (
-                f"IMPORTANT: The user asked in {query.language.code}. "
-                f"Respond in the SAME language ({query.language.code}). "
-                f"The source records may be in English — translate relevant parts."
-            )
-        else:
-            language_instruction = "Respond in English."
-
-        # Build prompt
-        prompt = SYNTHESIS_PROMPT.format(
-            query=query.original,
-            records=records_text,
-            language_instruction=language_instruction,
-        )
-
-        # Call LLM
-        answer_text = self._llm.generate(
-            prompt,
-            max_tokens=1024,
-            timeout=30.0,
-        )
-
-        # Calculate confidence based on results
-        confidence = self._calculate_confidence(results)
-
-        # Extract sources
-        sources = [
-            {
-                "record_id": r.record_id,
-                "title": r.title,
-                "domain": r.domain,
-                "certainty": r.certainty,
-                "score": r.score,
-            }
-            for r in results[:5]
-        ]
-
-        # Check for warnings
-        warnings = []
-        uncertain_count = sum(1 for r in results if r.certainty == "unknown")
-        if uncertain_count > 0:
-            warnings.append(f"{uncertain_count} record(s) have uncertain evidence")
-
-        partial_count = sum(1 for r in results if r.certainty == "partially_supported")
-        if partial_count > 0:
-            warnings.append(f"{partial_count} record(s) have partial evidence")
-
-        return SynthesizedAnswer(
-            answer=answer_text,
-            confidence=confidence,
-            sources=sources,
-            related_queries=self._suggest_followups(query, results),
-            warnings=warnings,
-        )
-
-    def _synthesize_fallback(
-        self,
-        query: ParsedQuery,
-        results: List[SearchResult]
-    ) -> SynthesizedAnswer:
-        """Fallback synthesis without LLM"""
-        # Format results
-        formatted_results = []
-        for i, r in enumerate(results[:5], 1):
-            certainty_marker = {
-                "supported": "✓",
-                "partially_supported": "~",
-                "unknown": "?",
-            }.get(r.certainty, "?")
-
-            formatted_results.append(f"""
-### {i}. {r.title} [{r.record_id}]
-**Domain**: {r.domain} | **Certainty**: {certainty_marker} {r.certainty} | **Score**: {r.score:.2f}
-
-{r.payload_text[:500]}{"..." if len(r.payload_text) > 500 else ""}
-""")
-
-        # Select language-specific fallback template
-        lang_code = query.language.code if query.language else "en"
-        template = FALLBACK_TEMPLATES.get(lang_code, FALLBACK_TEMPLATES["en"])
-
-        answer = template.format(
-            query=query.original,
-            count=len(results),
-            formatted_results="\n".join(formatted_results)
-        )
-
-        confidence = self._calculate_confidence(results)
-
-        sources = [
-            {
-                "record_id": r.record_id,
-                "title": r.title,
-                "domain": r.domain,
-                "certainty": r.certainty,
-                "score": r.score,
-            }
-            for r in results[:5]
-        ]
-
-        return SynthesizedAnswer(
-            answer=answer,
-            confidence=confidence,
-            sources=sources,
-            related_queries=self._suggest_followups(query, results),
-            warnings=["LLM not available - showing raw results"],
-        )
-
-    def _format_records_for_prompt(self, results: List[SearchResult]) -> str:
-        """Format search results for LLM prompt, grouping linked records."""
-        # Group results: linked groups together, standalone separate
-        groups = []  # List of (group_id_or_none, group_type, [results])
-        seen_groups = set()
-
-        for r in results:
-            if r.is_phase and r.group_id:
-                if r.group_id not in seen_groups:
-                    seen_groups.add(r.group_id)
-                    chain = [x for x in results if x.group_id == r.group_id]
-                    chain.sort(key=lambda x: x.phase_seq if x.phase_seq is not None else 0)
-                    gtype = r.group_type or "phase_chain"
-                    groups.append((r.group_id, gtype, chain))
-            elif not r.is_phase:
-                groups.append((None, None, [r]))
-
-        formatted = []
-        record_num = 1
-
-        for group_id, group_type, group_results in groups:
-            if group_id and len(group_results) > 1:
-                first = group_results[0]
-                if group_type == "bundle":
-                    # Bundle — detail facets of a single decision
-                    formatted.append(f"""
----
-Record {record_num}: Decision Bundle [{group_id}]
-Overall Domain: {first.domain}
-Facets: {len(group_results)}
-""")
-                    for facet in group_results:
-                        seq = (facet.phase_seq or 0) + 1
-                        formatted.append(f"""
-Facet {seq}: {facet.title} [{facet.record_id}]
-Certainty: {facet.certainty}
-
-{facet.payload_text[:800]}
-""")
-                else:
-                    # Phase chain — sequential reasoning
-                    formatted.append(f"""
----
-Record {record_num}: Phase Chain [{group_id}]
-Overall Domain: {first.domain}
-Phases: {len(group_results)}
-""")
-                    for phase in group_results:
-                        seq = (phase.phase_seq or 0) + 1
-                        total = phase.phase_total or len(group_results)
-                        formatted.append(f"""
-Phase {seq}/{total}: {phase.title} [{phase.record_id}]
-Certainty: {phase.certainty}
-
-{phase.payload_text[:800]}
-""")
-                formatted.append("---\n")
-                record_num += 1
-            else:
-                # Single record (standalone)
-                r = group_results[0]
-                formatted.append(f"""
----
-Record {record_num}: [{r.record_id}]
-Title: {r.title}
-Domain: {r.domain}
-Certainty: {r.certainty}
-Relevance Score: {r.score:.2f}
-
-Content:
-{r.payload_text[:1000]}
----
-""")
-                record_num += 1
-
-        return "\n".join(formatted)
-
-    def _calculate_confidence(self, results: List[SearchResult]) -> float:
-        """Calculate overall confidence from results"""
-        if not results:
-            return 0.0
-
-        # Weights for certainty levels
-        certainty_weights = {
-            "supported": 1.0,
-            "partially_supported": 0.6,
-            "unknown": 0.3,
-        }
-
-        # Weighted average of top results
-        total_weight = 0.0
-        total_score = 0.0
-
-        for i, r in enumerate(results[:5]):
-            # Position weight (higher for top results)
-            position_weight = 1.0 / (i + 1)
-
-            # Certainty weight
-            cert_weight = certainty_weights.get(r.certainty, 0.3)
-
-            # Combined weight
-            weight = position_weight * cert_weight * r.score
-            total_weight += weight
-            total_score += weight
-
-        if total_weight == 0:
-            return 0.0
-
-        # Normalize to 0-1 range
-        confidence = min(1.0, total_score / 2.0)  # Divide by 2 for reasonable scaling
-
-        return round(confidence, 2)
-
-    def _suggest_alternatives(self, query: ParsedQuery) -> List[str]:
-        """Suggest alternative queries when no results found"""
-        suggestions = []
-
-        # Broader search suggestions
-        if query.entities:
-            for entity in query.entities[:2]:
-                suggestions.append(f"Tell me about {entity}")
-
-        # By intent
-        if query.intent.value != "general":
-            suggestions.append(f"What decisions have we made about {' '.join(query.keywords[:3])}")
-
-        # Generic
-        suggestions.append("What recent decisions have we made?")
-
-        return suggestions[:3]
-
-    def _suggest_followups(
-        self,
-        query: ParsedQuery,
-        results: List[SearchResult]
-    ) -> List[str]:
-        """Suggest follow-up queries based on results"""
-        suggestions = []
-
-        # Based on result domains
-        domains = set(r.domain for r in results[:3])
-        for domain in domains:
-            if domain != "general":
-                suggestions.append(f"What other {domain} decisions have we made?")
-
-        # Based on entities in results
-        for r in results[:2]:
-            if r.title:
-                # Extract key term from title
-                words = r.title.split()[:3]
-                if words:
-                    suggestions.append(f"Why did we decide on {' '.join(words)}?")
-
-        # Generic follow-ups
-        suggestions.append("What were the alternatives considered?")
-        suggestions.append("Who was involved in this decision?")
-
-        return suggestions[:3]
-
-
-def format_answer_for_display(answer: SynthesizedAnswer) -> str:
-    """Format synthesized answer for CLI/UI display"""
-    lines = [
-        answer.answer,
-        "",
-        f"**Confidence**: {answer.confidence:.0%}",
-    ]
-
-    if answer.warnings:
-        lines.append("")
-        lines.append("**Warnings**:")
-        for w in answer.warnings:
-            lines.append(f"  - {w}")
-
-    if answer.sources:
-        lines.append("")
-        lines.append("**Sources**:")
-        for s in answer.sources[:3]:
-            lines.append(f"  - [{s['record_id']}] {s['title']} ({s['certainty']})")
-
-    if answer.related_queries:
-        lines.append("")
-        lines.append("**Related queries**:")
-        for q in answer.related_queries:
-            lines.append(f"  - {q}")
-
-    return "\n".join(lines)
diff --git a/agents/scribe/__init__.py b/agents/scribe/__init__.py
deleted file mode 100644
index 029b66e..0000000
--- a/agents/scribe/__init__.py
+++ /dev/null
@@ -1,43 +0,0 @@
-"""
-Scribe Agent - Organizational Context Capture
-
-Monitors team communications (Slack, GitHub, Notion) to identify and capture
-significant decisions using on-device similarity search.
-
-Key Components:
-- DecisionDetector: Pattern-based decision detection
-- RecordBuilder: Creates Decision Record with evidence
-- ReviewQueue: Manages human review for low-confidence captures
-- Handlers: Source-specific event processing (Slack, GitHub, etc.)
-
-10 Rules for Scribe:
-1. Not a logger - only capture significant decisions
-2. Schema v2 only - JSON + payload.text
-3. Why cannot be confirmed without evidence
-4. Evidence requires at least 1 quote
-5. Quotes should be 1-2 sentences (direct)
-6. No assumptions about finality without explicit signals
-7. Update status on decision reversal
-8. Default sensitivity to 'internal' when unclear
-9. Remove PII/credentials, note in review_notes
-10. Output always includes JSON + payload.text
-"""
-
-from .detector import DecisionDetector, DetectionResult
-from .record_builder import RecordBuilder, RawEvent
-from .review_queue import ReviewQueue, ReviewItem, ReviewAnswers
-
-__all__ = [
-    "DecisionDetector",
-    "DetectionResult",
-    "RecordBuilder",
-    "RawEvent",
-    "ReviewQueue",
-    "ReviewItem",
-    "ReviewAnswers",
-]
-
-# Legacy (available via direct import but not promoted)
-# from .tier2_filter import Tier2Filter
-# from .llm_extractor import LLMExtractor
-# from .pattern_parser import parse_capture_triggers
diff --git a/agents/scribe/detector.py b/agents/scribe/detector.py
deleted file mode 100644
index 86157e4..0000000
--- a/agents/scribe/detector.py
+++ /dev/null
@@ -1,225 +0,0 @@
-"""
-Decision Detector
-
-Similarity-based decision detection using pre-embedded patterns.
-Core component of the Scribe agent's Stage 1 pipeline.
-"""
-
-from dataclasses import dataclass
-from typing import Optional, List, Tuple
-
-from ..common.pattern_cache import PatternCache, PatternEntry
-
-
-@dataclass
-class DetectionResult:
-    """Result of decision detection"""
-    is_significant: bool
-    confidence: float
-    matched_pattern: Optional[str] = None
-    category: Optional[str] = None
-    domain: Optional[str] = None
-    priority: Optional[str] = None
-    top_matches: Optional[List[Tuple[str, float]]] = None  # For debugging
-
-
-class DecisionDetector:
-    """
-    Detects significant decisions using similarity search.
-
-    Algorithm:
-    1. Embed incoming text
-    2. Compute similarity to all pre-embedded patterns
-    3. If max similarity > threshold, mark as significant
-    4. Return detection result with confidence and matched pattern
-
-    This replaces ML-based classification with on-device similarity search.
-    """
-
-    def __init__(
-        self,
-        pattern_cache: PatternCache,
-        threshold: float = 0.35,
-        high_confidence_threshold: float = 0.7
-    ):
-        """
-        Initialize decision detector.
-
-        Args:
-            pattern_cache: PatternCache with pre-embedded patterns
-            threshold: Minimum similarity to consider significant
-            high_confidence_threshold: Threshold for auto-capture (no review)
-        """
-        self._cache = pattern_cache
-        self._threshold = threshold
-        self._high_confidence_threshold = high_confidence_threshold
-
-    @property
-    def threshold(self) -> float:
-        return self._threshold
-
-    @property
-    def high_confidence_threshold(self) -> float:
-        return self._high_confidence_threshold
-
-    def detect(self, text: str) -> DetectionResult:
-        """
-        Detect if text contains a significant decision.
-
-        Args:
-            text: Input text to analyze
-
-        Returns:
-            DetectionResult with significance, confidence, and matched pattern
-        """
-        if not text or not text.strip():
-            return DetectionResult(
-                is_significant=False,
-                confidence=0.0,
-            )
-
-        # Skip very short messages
-        if len(text.strip()) < 20:
-            return DetectionResult(
-                is_significant=False,
-                confidence=0.0,
-            )
-
-        # Find best matching pattern
-        match, score = self._cache.find_best_match(text, threshold=0.0)
-
-        # Determine significance
-        is_significant = score >= self._threshold
-
-        if match:
-            return DetectionResult(
-                is_significant=is_significant,
-                confidence=score,
-                matched_pattern=match.text,
-                category=match.category,
-                domain=match.domain,
-                priority=match.priority,
-            )
-
-        return DetectionResult(
-            is_significant=False,
-            confidence=score,
-        )
-
-    def detect_with_details(self, text: str, top_k: int = 5) -> DetectionResult:
-        """
-        Detect with additional details including top matches.
-
-        Useful for debugging and understanding why a decision was detected.
-
-        Args:
-            text: Input text to analyze
-            top_k: Number of top matches to include
-
-        Returns:
-            DetectionResult with top_matches for debugging
-        """
-        if not text or not text.strip():
-            return DetectionResult(
-                is_significant=False,
-                confidence=0.0,
-                top_matches=[],
-            )
-
-        # Find top matches
-        matches = self._cache.find_top_matches(text, top_k=top_k, threshold=0.0)
-
-        if not matches:
-            return DetectionResult(
-                is_significant=False,
-                confidence=0.0,
-                top_matches=[],
-            )
-
-        # Best match
-        best_pattern, best_score = matches[0]
-        is_significant = best_score >= self._threshold
-
-        # Format top matches for debugging
-        top_matches = [(p.text, s) for p, s in matches]
-
-        return DetectionResult(
-            is_significant=is_significant,
-            confidence=best_score,
-            matched_pattern=best_pattern.text,
-            category=best_pattern.category,
-            domain=best_pattern.domain,
-            priority=best_pattern.priority,
-            top_matches=top_matches,
-        )
-
-    def should_auto_capture(self, result: DetectionResult) -> bool:
-        """
-        Check if detection result warrants auto-capture (skip review).
-
-        Auto-capture when:
-        - Is significant AND
-        - Confidence >= high_confidence_threshold
-
-        Args:
-            result: Detection result to evaluate
-
-        Returns:
-            True if should auto-capture, False if needs review
-        """
-        if not result.is_significant:
-            return False
-
-        return result.confidence >= self._high_confidence_threshold
-
-    def needs_review(self, result: DetectionResult) -> bool:
-        """
-        Check if detection result needs human review.
-
-        Review needed when:
-        - Is significant but confidence < high_confidence_threshold
-        - Medium priority pattern
-
-        Args:
-            result: Detection result to evaluate
-
-        Returns:
-            True if needs review, False otherwise
-        """
-        if not result.is_significant:
-            return False
-
-        return not self.should_auto_capture(result)
-
-    def explain_detection(self, result: DetectionResult) -> str:
-        """
-        Generate human-readable explanation of detection.
-
-        Args:
-            result: Detection result to explain
-
-        Returns:
-            Explanation string
-        """
-        if not result.is_significant:
-            return f"Not significant (confidence: {result.confidence:.2f}, threshold: {self._threshold})"
-
-        lines = [
-            f"Significant decision detected (confidence: {result.confidence:.2f})",
-            f"  Matched pattern: \"{result.matched_pattern}\"",
-            f"  Category: {result.category}",
-            f"  Domain: {result.domain}",
-            f"  Priority: {result.priority}",
-        ]
-
-        if self.should_auto_capture(result):
-            lines.append("  Action: AUTO-CAPTURE (high confidence)")
-        else:
-            lines.append("  Action: NEEDS REVIEW (moderate confidence)")
-
-        if result.top_matches:
-            lines.append("  Top matches:")
-            for pattern, score in result.top_matches[:3]:
-                lines.append(f"    - \"{pattern[:50]}...\" ({score:.2f})")
-
-        return "\n".join(lines)
diff --git a/agents/scribe/handlers/__init__.py b/agents/scribe/handlers/__init__.py
deleted file mode 100644
index ac45927..0000000
--- a/agents/scribe/handlers/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-"""
-Source Handlers
-
-Extensible handlers for different event sources.
-Each handler converts source-specific events to a common Message format.
-
-Available Handlers:
-- SlackHandler: Slack webhook events
-- NotionHandler: Notion webhook events
-
-Future Handlers:
-- GitHubHandler: PR/Issue webhooks
-"""
-
-from .base import BaseHandler, Message
-from .slack import SlackHandler
-from .notion import NotionHandler
-
-__all__ = [
-    "BaseHandler",
-    "Message",
-    "SlackHandler",
-    "NotionHandler",
-]
diff --git a/agents/scribe/handlers/base.py b/agents/scribe/handlers/base.py
deleted file mode 100644
index e4d49ec..0000000
--- a/agents/scribe/handlers/base.py
+++ /dev/null
@@ -1,143 +0,0 @@
-"""
-Base Handler
-
-Abstract base class for source-specific event handlers.
-Provides a common interface for converting events to Messages.
-"""
-
-from abc import ABC, abstractmethod
-from dataclasses import dataclass, field
-from typing import Optional, Dict, Any
-from datetime import datetime
-
-
-@dataclass
-class Message:
-    """
-    Common message format for all sources.
-
-    This is the standardized format that Scribe works with,
-    regardless of the original source (Slack, GitHub, etc.).
-    """
-    text: str
-    user: str
-    channel: str
-    source: str  # "slack", "github", "notion", etc.
-    timestamp: str
-    thread_ts: Optional[str] = None
-    url: Optional[str] = None
-    is_bot: bool = False
-    mentions: list = field(default_factory=list)
-    reactions: list = field(default_factory=list)
-    attachments: list = field(default_factory=list)
-    raw_data: Optional[Dict[str, Any]] = None
-
-    @property
-    def datetime(self) -> Optional[datetime]:
-        """Parse timestamp to datetime"""
-        try:
-            return datetime.fromtimestamp(float(self.timestamp))
-        except (ValueError, TypeError):
-            return None
-
-    @property
-    def is_valid(self) -> bool:
-        """Check if message has minimum required fields"""
-        return bool(self.text and self.text.strip())
-
-
-class BaseHandler(ABC):
-    """
-    Abstract base class for source handlers.
-
-    Each handler must implement:
-    - parse_event: Convert raw event to Message
-    - verify_signature: Verify webhook signature (if applicable)
-    """
-
-    def __init__(self, source_name: str):
-        """
-        Initialize handler.
-
-        Args:
-            source_name: Name of the source (e.g., "slack", "github")
-        """
-        self.source_name = source_name
-
-    @abstractmethod
-    async def parse_event(self, raw_data: Dict[str, Any]) -> Optional[Message]:
-        """
-        Parse raw event data into a Message.
-
-        Args:
-            raw_data: Raw event data from the source
-
-        Returns:
-            Message object or None if event should be ignored
-        """
-        pass
-
-    @abstractmethod
-    def verify_signature(
-        self,
-        body: bytes,
-        signature: str,
-        timestamp: str
-    ) -> bool:
-        """
-        Verify the webhook signature.
-
-        Args:
-            body: Raw request body
-            signature: Signature from headers
-            timestamp: Timestamp from headers
-
-        Returns:
-            True if signature is valid
-        """
-        pass
-
-    def should_process(self, message: Message) -> bool:
-        """
-        Check if message should be processed.
-
-        Default implementation filters out:
-        - Bot messages
-        - Empty messages
-        - Very short messages
-
-        Override in subclass for source-specific filtering.
-
-        Args:
-            message: Parsed message
-
-        Returns:
-            True if message should be processed
-        """
-        # Skip invalid messages
-        if not message.is_valid:
-            return False
-
-        # Skip bot messages
-        if message.is_bot:
-            return False
-
-        # Skip very short messages
-        if len(message.text.strip()) < 20:
-            return False
-
-        return True
-
-    def extract_thread_context(self, raw_data: Dict[str, Any]) -> Optional[str]:
-        """
-        Extract thread context if message is part of a thread.
-
-        Override in subclass for source-specific implementation.
-
-        Args:
-            raw_data: Raw event data
-
-        Returns:
-            Thread context or None
-        """
-        return None
diff --git a/agents/scribe/handlers/notion.py b/agents/scribe/handlers/notion.py
deleted file mode 100644
index 0844aca..0000000
--- a/agents/scribe/handlers/notion.py
+++ /dev/null
@@ -1,260 +0,0 @@
-"""
-Notion Handler
-
-Handles Notion webhook events and converts them to Messages.
-Supports page.created, page.updated, and database.updated events
-via Notion's Send Webhooks API.
-"""
-
-import hmac
-import hashlib
-import time
-import logging
-from typing import Optional, Dict, Any, List
-
-from .base import BaseHandler, Message
-
-logger = logging.getLogger("rune.scribe.notion")
-
-
-class NotionHandler(BaseHandler):
-    """
-    Handler for Notion webhook events.
-
-    Processes:
-    - page.created: New page creation
-    - page.updated: Page content or property updates
-    - database.updated: Database schema or entry changes
-
-    Ignores:
-    - Bot/automation edits
-    - Template instantiation
-    - Very short pages (likely stubs)
-    """
-
-    def __init__(self, signing_secret: str = ""):
-        """
-        Initialize Notion handler.
-
-        Args:
-            signing_secret: Notion webhook secret for HMAC-SHA256 verification
-        """
-        super().__init__("notion")
-        self._signing_secret = signing_secret
-
-    async def parse_event(self, raw_data: Dict[str, Any]) -> Optional[Message]:
-        """
-        Parse Notion webhook event into Message.
-
-        Args:
-            raw_data: Raw Notion webhook payload
-
-        Returns:
-            Message object or None if event should be ignored
-        """
-        event_type = raw_data.get("type", "")
-
-        if event_type in ("page.created", "page.updated"):
-            return self._parse_page_event(raw_data)
-        elif event_type == "database.updated":
-            return self._parse_database_event(raw_data)
-
-        return None
-
-    def _parse_page_event(self, raw_data: Dict[str, Any]) -> Optional[Message]:
-        """Parse a page.created or page.updated event"""
-        page = raw_data.get("data", raw_data.get("page", {}))
-
-        # Extract title from properties
-        title = self._extract_title(page)
-
-        # Extract rich_text content from page blocks (if included)
-        body = self._extract_body(raw_data)
-
-        # Combine title + body
-        text = title
-        if body:
-            text = f"{title}\n\n{body}" if title else body
-
-        if not text:
-            return None
-
-        # Extract user
-        user = self._extract_user(page)
-
-        # Extract parent context as channel
-        channel = self._extract_parent_name(page)
-
-        # Extract timestamp — prefer last_edited_time, fall back to created_time
-        timestamp = self._extract_timestamp(page)
-
-        # Page URL
-        url = page.get("url")
-
-        # Detect bot/automation edits
-        is_bot = self._is_bot_edit(page)
-
-        return Message(
-            text=text,
-            user=user,
-            channel=channel,
-            source="notion",
-            timestamp=timestamp,
-            thread_ts=None,
-            url=url,
-            is_bot=is_bot,
-            raw_data=raw_data,
-        )
-
-    def _parse_database_event(self, raw_data: Dict[str, Any]) -> Optional[Message]:
-        """Parse a database.updated event"""
-        database = raw_data.get("data", raw_data.get("database", {}))
-
-        # Database title
-        title_parts = database.get("title", [])
-        title = "".join(
-            t.get("plain_text", "") for t in title_parts
-        )
-
-        if not title:
-            return None
-
-        text = f"Database updated: {title}"
-
-        # Description if available
-        description_parts = database.get("description", [])
-        if description_parts:
-            desc = "".join(t.get("plain_text", "") for t in description_parts)
-            if desc:
-                text = f"{text}\n\n{desc}"
-
-        user = self._extract_user(database)
-        timestamp = self._extract_timestamp(database)
-
-        return Message(
-            text=text,
-            user=user,
-            channel=title,
-            source="notion",
-            timestamp=timestamp,
-            thread_ts=None,
-            url=database.get("url"),
-            is_bot=self._is_bot_edit(database),
-            raw_data=raw_data,
-        )
-
-    def _extract_title(self, page: Dict[str, Any]) -> str:
-        """Extract page title from properties"""
-        properties = page.get("properties", {})
-        for prop in properties.values():
-            if prop.get("type") == "title":
-                title_parts = prop.get("title", [])
-                return "".join(t.get("plain_text", "") for t in title_parts)
-        return ""
-
-    def _extract_body(self, raw_data: Dict[str, Any]) -> str:
-        """Extract body text from rich_text blocks if included in payload"""
-        blocks = raw_data.get("blocks", raw_data.get("children", []))
-        parts: List[str] = []
-
-        for block in blocks:
-            block_type = block.get("type", "")
-            block_data = block.get(block_type, {})
-
-            # Extract rich_text from common block types
-            rich_text = block_data.get("rich_text", [])
-            text = "".join(t.get("plain_text", "") for t in rich_text)
-            if text:
-                parts.append(text)
-
-        return "\n".join(parts)
-
-    def _extract_user(self, obj: Dict[str, Any]) -> str:
-        """Extract user ID from last_edited_by or created_by"""
-        editor = obj.get("last_edited_by", obj.get("created_by", {}))
-        return editor.get("id", "unknown")
-
-    def _extract_parent_name(self, page: Dict[str, Any]) -> str:
-        """Extract parent context (database name or parent page) as channel"""
-        parent = page.get("parent", {})
-        parent_type = parent.get("type", "")
-
-        if parent_type == "database_id":
-            return f"db:{parent.get('database_id', 'unknown')}"
-        elif parent_type == "page_id":
-            return f"page:{parent.get('page_id', 'unknown')}"
-        elif parent_type == "workspace":
-            return "workspace"
-
-        return "notion"
-
-    def _extract_timestamp(self, obj: Dict[str, Any]) -> str:
-        """Extract timestamp, converting ISO to unix epoch string"""
-        iso_time = obj.get("last_edited_time", obj.get("created_time", ""))
-        if iso_time:
-            try:
-                from datetime import datetime, timezone
-                # Notion uses ISO 8601 format
-                dt = datetime.fromisoformat(iso_time.replace("Z", "+00:00"))
-                return str(dt.timestamp())
-            except (ValueError, TypeError):
-                pass
-        return str(time.time())
-
-    def _is_bot_edit(self, obj: Dict[str, Any]) -> bool:
-        """Check if edit was made by a bot/integration"""
-        editor = obj.get("last_edited_by", obj.get("created_by", {}))
-        return editor.get("type") == "bot"
-
-    def verify_signature(
-        self,
-        body: bytes,
-        signature: str,
-        timestamp: str
-    ) -> bool:
-        """
-        Verify Notion webhook signature using HMAC-SHA256.
-
-        Args:
-            body: Raw request body
-            signature: X-Notion-Signature header value
-            timestamp: X-Notion-Timestamp header value (unused for Notion,
-                       kept for BaseHandler interface compatibility)
-
-        Returns:
-            True if signature is valid
-        """
-        if not self._signing_secret:
-            return True
-
-        if not signature:
-            return False
-
-        expected_sig = hmac.new(
-            self._signing_secret.encode("utf-8"),
-            body,
-            hashlib.sha256
-        ).hexdigest()
-
-        return hmac.compare_digest(expected_sig, signature)
-
-    def should_process(self, message: Message) -> bool:
-        """
-        Check if Notion message should be processed.
-
-        Filters out:
-        - Bot/automation edits (via base class)
-        - Very short content (via base class)
-        - Template instantiation (title starts with common template markers)
-        """
-        if not super().should_process(message):
-            return False
-
-        title_line = message.text.split("\n")[0].strip()
-
-        # Skip untitled/template pages
-        skip_prefixes = ("Untitled", "Template:", "[Template]", "Copy of ")
-        if title_line.startswith(skip_prefixes):
-            return False
-
-        return True
diff --git a/agents/scribe/handlers/slack.py b/agents/scribe/handlers/slack.py
deleted file mode 100644
index 2890fde..0000000
--- a/agents/scribe/handlers/slack.py
+++ /dev/null
@@ -1,236 +0,0 @@
-"""
-Slack Handler
-
-Handles Slack webhook events and converts them to Messages.
-"""
-
-import hmac
-import hashlib
-import time
-from typing import Optional, Dict, Any, List
-
-from .base import BaseHandler, Message
-
-
-class SlackHandler(BaseHandler):
-    """
-    Handler for Slack Events API webhooks.
-
-    Processes:
-    - message events (new messages in channels)
-    - message_changed events (edited messages)
-
-    Ignores:
-    - Bot messages
-    - Message deletions
-    - Reaction events (for now)
-    """
-
-    def __init__(self, signing_secret: str = ""):
-        """
-        Initialize Slack handler.
-
-        Args:
-            signing_secret: Slack signing secret for verification
-        """
-        super().__init__("slack")
-        self._signing_secret = signing_secret
-
-    async def parse_event(self, raw_data: Dict[str, Any]) -> Optional[Message]:
-        """
-        Parse Slack event into Message.
-
-        Args:
-            raw_data: Raw Slack event data
-
-        Returns:
-            Message object or None if event should be ignored
-        """
-        # Handle URL verification challenge
-        if raw_data.get("type") == "url_verification":
-            return None
-
-        # Handle event callback
-        if raw_data.get("type") != "event_callback":
-            return None
-
-        event = raw_data.get("event", {})
-        event_type = event.get("type", "")
-
-        # Handle message events
-        if event_type == "message":
-            if event.get("subtype") == "message_changed":
-                return self._parse_message_changed_event(event, raw_data)
-            return self._parse_message_event(event, raw_data)
-
-        return None
-
-    def _parse_message_event(
-        self,
-        event: Dict[str, Any],
-        raw_data: Dict[str, Any]
-    ) -> Optional[Message]:
-        """Parse a standard message event"""
-        # Skip bot messages
-        if event.get("bot_id") or event.get("subtype") == "bot_message":
-            return None
-
-        # Skip message subtypes we don't care about
-        ignored_subtypes = [
-            "channel_join", "channel_leave", "channel_topic",
-            "channel_purpose", "channel_name", "message_deleted",
-            "file_share", "thread_broadcast",
-        ]
-        if event.get("subtype") in ignored_subtypes:
-            return None
-
-        text = event.get("text", "")
-        user = event.get("user", "")
-        channel = event.get("channel", "")
-        ts = event.get("ts", "")
-        thread_ts = event.get("thread_ts")
-
-        # Extract mentions
-        mentions = self._extract_mentions(text)
-
-        # Extract reactions (if available)
-        reactions = event.get("reactions", [])
-
-        # Build URL if team info available
-        url = None
-        team_id = raw_data.get("team_id")
-        if team_id and channel and ts:
-            url = f"https://slack.com/archives/{channel}/p{ts.replace('.', '')}"
-
-        return Message(
-            text=text,
-            user=user,
-            channel=channel,
-            source="slack",
-            timestamp=ts,
-            thread_ts=thread_ts,
-            url=url,
-            is_bot=False,
-            mentions=mentions,
-            reactions=reactions,
-            raw_data=event,
-        )
-
-    def _parse_message_changed_event(
-        self,
-        event: Dict[str, Any],
-        raw_data: Dict[str, Any]
-    ) -> Optional[Message]:
-        """Parse a message_changed event"""
-        message = event.get("message", {})
-
-        # Skip bot messages
-        if message.get("bot_id"):
-            return None
-
-        return Message(
-            text=message.get("text", ""),
-            user=message.get("user", ""),
-            channel=event.get("channel", ""),
-            source="slack",
-            timestamp=message.get("ts", ""),
-            thread_ts=message.get("thread_ts"),
-            is_bot=False,
-            mentions=self._extract_mentions(message.get("text", "")),
-            raw_data=event,
-        )
-
-    def _extract_mentions(self, text: str) -> List[str]:
-        """Extract user mentions from text"""
-        import re
-        # Slack mentions format: <@U12345678>
-        matches = re.findall(r'<@(U[A-Z0-9]+)>', text)
-        return matches
-
-    def verify_signature(
-        self,
-        body: bytes,
-        signature: str,
-        timestamp: str
-    ) -> bool:
-        """
-        Verify Slack request signature.
-
-        Args:
-            body: Raw request body
-            signature: X-Slack-Signature header
-            timestamp: X-Slack-Request-Timestamp header
-
-        Returns:
-            True if signature is valid
-        """
-        if not self._signing_secret:
-            # Skip verification if no secret configured
-            return True
-
-        if not signature or not timestamp:
-            return False
-
-        # Check timestamp is recent (within 5 minutes)
-        try:
-            ts = int(timestamp)
-            if abs(time.time() - ts) > 300:
-                return False
-        except ValueError:
-            return False
-
-        # Compute expected signature
-        sig_basestring = f"v0:{timestamp}:{body.decode('utf-8')}"
-        expected_sig = "v0=" + hmac.new(
-            self._signing_secret.encode('utf-8'),
-            sig_basestring.encode('utf-8'),
-            hashlib.sha256
-        ).hexdigest()
-
-        # Compare signatures
-        return hmac.compare_digest(expected_sig, signature)
-
-    def should_process(self, message: Message) -> bool:
-        """
-        Check if Slack message should be processed.
-
-        Additional Slack-specific filtering.
-        """
-        if not super().should_process(message):
-            return False
-
-        # Skip messages that are just mentions or links
-        text = message.text.strip()
-
-        # Skip if mostly mentions
-        import re
-        clean_text = re.sub(r'<@U[A-Z0-9]+>', '', text).strip()
-        if len(clean_text) < 15:
-            return False
-
-        # Skip if mostly URLs
-        clean_text = re.sub(r'<https?://[^>]+>', '', clean_text).strip()
-        if len(clean_text) < 15:
-            return False
-
-        return True
-
-    def format_channel_name(self, channel_id: str, channel_name: str = None) -> str:
-        """Format channel for display"""
-        if channel_name:
-            return f"#{channel_name}"
-        return f"#{channel_id}"
-
-    def is_thread_reply(self, message: Message) -> bool:
-        """Check if message is a thread reply"""
-        return message.thread_ts is not None and message.thread_ts != message.timestamp
-
-    def is_url_verification(self, raw_data: Dict[str, Any]) -> bool:
-        """Check if request is URL verification"""
-        return raw_data.get("type") == "url_verification"
-
-    def get_challenge(self, raw_data: Dict[str, Any]) -> Optional[str]:
-        """Get challenge for URL verification"""
-        if self.is_url_verification(raw_data):
-            return raw_data.get("challenge")
-        return None
diff --git a/agents/scribe/llm_extractor.py b/agents/scribe/llm_extractor.py
deleted file mode 100644
index edbde7e..0000000
--- a/agents/scribe/llm_extractor.py
+++ /dev/null
@@ -1,421 +0,0 @@
-"""
-LLM-based Field Extractor
-
-Extracts structured decision record fields from non-English text using LLM.
-All outputs are translated to English for embedding consistency.
-
-Supports phase-aware extraction: long reasoning processes (>800 chars) are
-automatically split into logical phases, each becoming a linked DecisionRecord.
-"""
-
-import json
-import logging
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-from ..common.llm_client import LLMClient
-from ..common.llm_utils import parse_llm_json
-
-logger = logging.getLogger("rune.scribe.llm_extractor")
-
-# Texts longer than this threshold trigger multi-phase extraction
-PHASE_SPLIT_THRESHOLD = 800
-
-# Texts longer than this, or with many detail items, trigger bundle split
-BUNDLE_SPLIT_THRESHOLD = 1500
-
-
-@dataclass
-class ExtractedFields:
-    """Fields extracted by LLM from non-English text"""
-    title: str = ""
-    rationale: str = ""
-    problem: str = ""
-    alternatives: List[str] = field(default_factory=list)
-    trade_offs: List[str] = field(default_factory=list)
-    status_hint: str = ""       # "proposed" | "accepted" | "rejected"
-    tags: List[str] = field(default_factory=list)
-
-
-@dataclass
-class PhaseExtractedFields:
-    """Fields for a single phase within a multi-phase reasoning chain"""
-    phase_title: str = ""
-    phase_decision: str = ""
-    phase_rationale: str = ""
-    phase_problem: str = ""
-    alternatives: List[str] = field(default_factory=list)
-    trade_offs: List[str] = field(default_factory=list)
-    tags: List[str] = field(default_factory=list)
-
-
-@dataclass
-class ExtractionResult:
-    """Result of LLM extraction — may be single, multi-phase, or bundle"""
-    group_title: str = ""
-    group_type: str = ""  # "phase_chain", "bundle", or "" (single)
-    group_summary: str = ""  # 1-line semantic anchor shared across all phases
-    status_hint: str = ""
-    tags: List[str] = field(default_factory=list)
-    confidence: Optional[float] = None  # Agent-provided confidence (0.0-1.0)
-    single: Optional[ExtractedFields] = None
-    phases: Optional[List[PhaseExtractedFields]] = None
-
-    @property
-    def is_multi_phase(self) -> bool:
-        return self.phases is not None and len(self.phases) > 1
-
-    @property
-    def is_bundle(self) -> bool:
-        return self.group_type == "bundle" and self.phases is not None and len(self.phases) > 1
-
-
-EXTRACTION_PROMPT = """You are a structured information extractor for organizational decision records.
-
-Given a message (which may be in any language), extract the following fields.
-IMPORTANT: All output values MUST be in English (translate if needed).
-
-Respond with a valid JSON object with these keys:
-- "title": A short title for the decision (5-60 chars, in English)
-- "rationale": The reasoning behind the decision (in English, empty string if not found)
-- "problem": The problem being solved (in English, empty string if not found)
-- "alternatives": List of alternatives considered (in English, empty list if none)
-- "trade_offs": List of trade-offs mentioned (in English, empty list if none)
-- "status_hint": One of "proposed", "accepted", "rejected" based on the tone/language
-- "tags": List of relevant topic tags (in English, e.g. ["database", "migration"])
-
-Rules:
-- Translate ALL values to English
-- Keep the title concise and descriptive
-- If a field is not clearly present in the text, use empty string or empty list
-- For status_hint: use "accepted" if the message indicates a finalized decision, "proposed" if tentative, "rejected" if something was decided against
-
-Message to extract from:
-{text}
-
-JSON:"""
-
-
-PHASE_EXTRACTION_PROMPT = """You are a structured information extractor for organizational decision records.
-
-Given a long message containing a multi-part reasoning process (which may be in any language), split it into LOGICAL PHASES and extract structured information for each phase.
-
-IMPORTANT:
-- Split by LOGICAL REASONING PHASES, not by paragraph or character count.
-- Each phase should represent a distinct sub-decision, conclusion, or reasoning step.
-- All output values MUST be in English (translate if needed).
-- Aim for 2-5 phases. Do not create more than 7 phases.
-- If the text is actually a single decision (not multi-phase), return a single phase.
-
-Respond with a valid JSON object:
-{{
-    "group_title": "Overall title for the entire reasoning chain (5-60 chars, English)",
-    "status_hint": "proposed" or "accepted" or "rejected",
-    "tags": ["relevant", "topic", "tags"],
-    "phases": [
-        {{
-            "phase_title": "Short title for this phase (e.g., 'Target Market Analysis')",
-            "phase_decision": "The key decision or conclusion of this phase",
-            "phase_rationale": "Why this conclusion was reached",
-            "phase_problem": "The sub-problem this phase addresses",
-            "alternatives": ["alternatives considered in this phase"],
-            "trade_offs": ["trade-offs for this phase"],
-            "tags": ["phase-specific tags"]
-        }}
-    ]
-}}
-
-Rules:
-- Translate ALL values to English
-- Each phase_decision should be self-contained and meaningful on its own
-- phase_title should indicate the topic/aspect (e.g., "Positioning Strategy", "Pricing Model", "Go-to-Market Timeline")
-
-Message to extract from:
-{text}
-
-JSON:"""
-
-
-BUNDLE_SPLIT_PROMPT = """You are a structured information extractor for organizational decision records.
-
-Given a message describing a SINGLE decision with rich details (which may be in any language), split it into a CORE record plus DETAIL FACETS. This is NOT about sequential reasoning — it's about organizing the supporting material of one decision.
-
-IMPORTANT:
-- The first item MUST be the "Core Decision" — a concise summary of the main decision.
-- Subsequent items are detail facets: alternatives analysis, trade-offs, implementation plan, rationale deep-dive, etc.
-- All output values MUST be in English (translate if needed).
-- Aim for 2-4 facets total (including core). Do not create more than 5.
-- Each facet should be self-contained and meaningful on its own.
-
-Respond with a valid JSON object:
-{{
-    "group_title": "Overall title for the decision (5-60 chars, English)",
-    "status_hint": "proposed" or "accepted" or "rejected",
-    "tags": ["relevant", "topic", "tags"],
-    "phases": [
-        {{
-            "phase_title": "Core Decision",
-            "phase_decision": "The main decision statement — concise",
-            "phase_rationale": "Brief summary of why",
-            "phase_problem": "The problem being solved",
-            "alternatives": [],
-            "trade_offs": [],
-            "tags": []
-        }},
-        {{
-            "phase_title": "Alternatives Analysis",
-            "phase_decision": "Detailed comparison of alternatives considered",
-            "phase_rationale": "Why the chosen option was selected over others",
-            "phase_problem": "",
-            "alternatives": ["alt1", "alt2", "alt3"],
-            "trade_offs": ["trade-off for each"],
-            "tags": []
-        }}
-    ]
-}}
-
-Rules:
-- Translate ALL values to English
-- First facet is always "Core Decision" with the essential what/why
-- Other facets organize the supporting detail (alternatives, trade-offs, implementation, evidence, etc.)
-- Each phase_decision should be self-contained — readable without other facets
-
-Message to extract from:
-{text}
-
-JSON:"""
-
-
-class LLMExtractor:
-    """Extracts structured fields from text using Claude API.
-
-    Supports phase-aware extraction for long reasoning chains.
-    """
-
-    def __init__(
-        self,
-        llm_provider: str = "anthropic",
-        anthropic_api_key: Optional[str] = None,
-        openai_api_key: Optional[str] = None,
-        google_api_key: Optional[str] = None,
-        model: str = "claude-sonnet-4-20250514",
-    ):
-        self._provider = llm_provider
-        self._model = model
-        self._llm = LLMClient(
-            provider=llm_provider,
-            model=model,
-            anthropic_api_key=anthropic_api_key,
-            openai_api_key=openai_api_key,
-            google_api_key=google_api_key,
-        )
-
-    @property
-    def is_available(self) -> bool:
-        """Check if LLM client is ready"""
-        return self._llm.is_available
-
-    def _generate(self, prompt: str, max_tokens: int) -> str:
-        return self._llm.generate(prompt, max_tokens=max_tokens)
-
-    def extract(self, text: str) -> ExtractionResult:
-        """Extract structured fields, auto-detecting split strategy.
-
-        Split strategy:
-        - Short text (<=800 chars): single extraction, then bundle check
-        - Long text (>800 chars): phase extraction first, bundle fallback
-
-        Args:
-            text: Input text (any language)
-
-        Returns:
-            ExtractionResult (check .is_multi_phase or .is_bundle)
-        """
-        if not self.is_available:
-            return ExtractionResult(single=ExtractedFields())
-
-        if len(text) <= PHASE_SPLIT_THRESHOLD:
-            fields = self._extract_single(text)
-            result = ExtractionResult(
-                group_title=fields.title,
-                status_hint=fields.status_hint,
-                tags=fields.tags,
-                single=fields,
-            )
-            # Check if even short text has overflow details
-            if self._needs_bundle_split(text, fields):
-                try:
-                    return self._extract_bundle(text)
-                except Exception as e:
-                    logger.warning("Bundle extraction failed for short text: %s", e)
-            return result
-
-        # Long text: try phase extraction first
-        try:
-            result = self._extract_phases(text)
-            if result.is_multi_phase:
-                result.group_type = "phase_chain"
-                return result
-            # Phase returned single — check if bundle needed
-            if self._needs_bundle_split(text, result.single):
-                try:
-                    return self._extract_bundle(text)
-                except Exception as e:
-                    logger.warning("Bundle extraction failed after phase: %s", e)
-            return result
-        except Exception as e:
-            logger.warning("Phase extraction failed: %s", e)
-            # Try bundle before falling back to single
-            if len(text) > BUNDLE_SPLIT_THRESHOLD:
-                try:
-                    return self._extract_bundle(text)
-                except Exception as e2:
-                    logger.warning("Bundle extraction also failed: %s", e2)
-            fields = self._extract_single(text)
-            return ExtractionResult(
-                group_title=fields.title,
-                status_hint=fields.status_hint,
-                tags=fields.tags,
-                single=fields,
-            )
-
-    def extract_single(self, text: str) -> ExtractedFields:
-        """Extract as single record (backward-compatible entry point)."""
-        if not self.is_available:
-            return ExtractedFields()
-        return self._extract_single(text)
-
-    def _extract_single(self, text: str) -> ExtractedFields:
-        """Single-phase extraction (original logic)."""
-        try:
-            prompt = EXTRACTION_PROMPT.format(text=text)
-            raw = self._generate(prompt, max_tokens=512)
-            return self._parse_single_response(raw)
-        except Exception as e:
-            logger.warning("Single extraction failed: %s", e)
-            return ExtractedFields()
-
-    def _extract_phases(self, text: str) -> ExtractionResult:
-        """Multi-phase extraction for long reasoning chains."""
-        prompt = PHASE_EXTRACTION_PROMPT.format(text=text)
-        raw = self._generate(prompt, max_tokens=2048)
-        data = parse_llm_json(raw)
-
-        phases_data = data.get("phases", [])
-        group_title = str(data.get("group_title", ""))[:60]
-        status_hint = str(data.get("status_hint", "")).lower()
-        tags = [str(t).lower() for t in data.get("tags", []) if t]
-
-        # If LLM returned 0 or 1 phase, treat as single
-        if len(phases_data) <= 1:
-            p = phases_data[0] if phases_data else {}
-            return ExtractionResult(
-                group_title=group_title,
-                status_hint=status_hint,
-                tags=tags,
-                single=ExtractedFields(
-                    title=str(p.get("phase_title", group_title))[:60],
-                    rationale=str(p.get("phase_rationale", "")),
-                    problem=str(p.get("phase_problem", "")),
-                    alternatives=[str(a) for a in p.get("alternatives", []) if a],
-                    trade_offs=[str(t) for t in p.get("trade_offs", []) if t],
-                    status_hint=status_hint,
-                    tags=tags,
-                ),
-            )
-
-        # Multi-phase
-        phases = []
-        for p in phases_data[:7]:  # cap at 7
-            phases.append(PhaseExtractedFields(
-                phase_title=str(p.get("phase_title", ""))[:60],
-                phase_decision=str(p.get("phase_decision", "")),
-                phase_rationale=str(p.get("phase_rationale", "")),
-                phase_problem=str(p.get("phase_problem", "")),
-                alternatives=[str(a) for a in p.get("alternatives", []) if a],
-                trade_offs=[str(t) for t in p.get("trade_offs", []) if t],
-                tags=[str(t).lower() for t in p.get("tags", []) if t],
-            ))
-
-        return ExtractionResult(
-            group_title=group_title,
-            status_hint=status_hint,
-            tags=tags,
-            phases=phases,
-        )
-
-    def _needs_bundle_split(self, text: str, fields: Optional[ExtractedFields]) -> bool:
-        """Check if content has detail overflow that warrants bundle splitting."""
-        # Long text that exceeds single record capacity
-        if len(text) > BUNDLE_SPLIT_THRESHOLD:
-            return True
-        # Moderate text with many detail items in multiple categories
-        if fields and len(fields.alternatives) > 3 and len(fields.trade_offs) > 3:
-            return True
-        return False
-
-    def _extract_bundle(self, text: str) -> ExtractionResult:
-        """Bundle extraction: split a single decision into detail facets."""
-        prompt = BUNDLE_SPLIT_PROMPT.format(text=text)
-        raw = self._generate(prompt, max_tokens=2048)
-        data = parse_llm_json(raw)
-
-        phases_data = data.get("phases", [])
-        group_title = str(data.get("group_title", ""))[:60]
-        status_hint = str(data.get("status_hint", "")).lower()
-        tags = [str(t).lower() for t in data.get("tags", []) if t]
-
-        # If LLM returned 0 or 1 facet, not a real bundle
-        if len(phases_data) <= 1:
-            p = phases_data[0] if phases_data else {}
-            return ExtractionResult(
-                group_title=group_title,
-                status_hint=status_hint,
-                tags=tags,
-                single=ExtractedFields(
-                    title=str(p.get("phase_title", group_title))[:60],
-                    rationale=str(p.get("phase_rationale", "")),
-                    problem=str(p.get("phase_problem", "")),
-                    alternatives=[str(a) for a in p.get("alternatives", []) if a],
-                    trade_offs=[str(t) for t in p.get("trade_offs", []) if t],
-                    status_hint=status_hint,
-                    tags=tags,
-                ),
-            )
-
-        # Multi-facet bundle — reuse PhaseExtractedFields structure
-        phases = []
-        for p in phases_data[:5]:  # cap at 5 facets
-            phases.append(PhaseExtractedFields(
-                phase_title=str(p.get("phase_title", ""))[:60],
-                phase_decision=str(p.get("phase_decision", "")),
-                phase_rationale=str(p.get("phase_rationale", "")),
-                phase_problem=str(p.get("phase_problem", "")),
-                alternatives=[str(a) for a in p.get("alternatives", []) if a],
-                trade_offs=[str(t) for t in p.get("trade_offs", []) if t],
-                tags=[str(t).lower() for t in p.get("tags", []) if t],
-            ))
-
-        return ExtractionResult(
-            group_title=group_title,
-            group_type="bundle",
-            status_hint=status_hint,
-            tags=tags,
-            phases=phases,
-        )
-
-    def _parse_single_response(self, raw: str) -> ExtractedFields:
-        """Parse LLM JSON response into ExtractedFields."""
-        data = parse_llm_json(raw)
-        if not data:
-            return ExtractedFields()
-
-        return ExtractedFields(
-            title=str(data.get("title", ""))[:60],
-            rationale=str(data.get("rationale", "")),
-            problem=str(data.get("problem", "")),
-            alternatives=[str(a) for a in data.get("alternatives", []) if a],
-            trade_offs=[str(t) for t in data.get("trade_offs", []) if t],
-            status_hint=str(data.get("status_hint", "")).lower(),
-            tags=[str(t).lower() for t in data.get("tags", []) if t],
-        )
diff --git a/agents/scribe/pattern_parser.py b/agents/scribe/pattern_parser.py
deleted file mode 100644
index 3ccd14c..0000000
--- a/agents/scribe/pattern_parser.py
+++ /dev/null
@@ -1,423 +0,0 @@
-"""
-Pattern Parser
-
-Parses trigger patterns from patterns/capture-triggers.md.
-Extracts phrases organized by category and priority.
-"""
-
-import re
-from pathlib import Path
-from typing import List, Dict, Optional
-
-
-# Domain mapping from category names
-CATEGORY_TO_DOMAIN = {
-    # Architecture & Engineering
-    "architecture": "architecture",
-    "technical": "architecture",
-    "performance": "architecture",
-    "optimization": "architecture",
-    "technical_debt": "architecture",
-    "migration": "architecture",
-    "infrastructure": "architecture",
-    "api": "architecture",
-    "database": "architecture",
-    # Security & Compliance
-    "security": "security",
-    "compliance": "security",
-    "encryption": "security",
-    "authentication": "security",
-    "authorization": "security",
-    "vulnerability": "security",
-    # Product & Business
-    "product": "product",
-    "business": "product",
-    "feature": "product",
-    "roadmap": "product",
-    "mvp": "product",
-    "startup": "product",
-    # Executive & Strategic
-    "executive": "exec",
-    "strategic": "exec",
-    "funding": "exec",
-    "board": "exec",
-    "leadership": "exec",
-    # Operations & Deployment
-    "ops": "ops",
-    "operations": "ops",
-    "deployment": "ops",
-    "monitoring": "ops",
-    "observability": "ops",
-    "sre": "ops",
-    "oncall": "ops",
-    "on_call": "ops",
-    "runbook": "ops",
-    # Design & UX
-    "design": "design",
-    "ux": "design",
-    "ui": "design",
-    "accessibility": "design",
-    "design_system": "design",
-    # Data & Analytics
-    "data": "data",
-    "analytics": "data",
-    "ml": "data",
-    "machine_learning": "data",
-    "statistics": "data",
-    # HR & People
-    "hr": "hr",
-    "hiring": "hr",
-    "people": "hr",
-    "onboarding": "hr",
-    "culture": "hr",
-    "compensation": "hr",
-    # Marketing & Growth
-    "marketing": "marketing",
-    "growth": "marketing",
-    "campaign": "marketing",
-    "brand": "marketing",
-    # Incident & Postmortem
-    "incident": "incident",
-    "incident_response": "incident",
-    "outage": "incident",
-    "postmortem": "incident",
-    "post_mortem": "incident",
-    "failure_retro": "incident",
-    "failure_retrospective": "incident",
-    "outage_communication": "incident",
-    "rca": "incident",
-    "root_cause": "incident",
-    # Debugging & Troubleshooting
-    "debugging": "debugging",
-    "troubleshooting": "debugging",
-    "bug_fix": "debugging",
-    "performance_investigation": "debugging",
-    "regression": "debugging",
-    "hotfix": "debugging",
-    # QA & Testing
-    "qa": "qa",
-    "testing": "qa",
-    "quality_assurance": "qa",
-    "bug_triage": "qa",
-    "test_strategy": "qa",
-    # Legal & Compliance
-    "legal": "legal",
-    "regulatory": "legal",
-    "contract": "legal",
-    "ip": "legal",
-    "patent": "legal",
-    "privacy": "legal",
-    # Finance & Budget
-    "finance": "finance",
-    "budget": "finance",
-    "cost": "finance",
-    "pricing": "finance",
-    "revenue": "finance",
-    # Sales & Partnerships
-    "sales": "sales",
-    "partnership": "sales",
-    "deal": "sales",
-    "enterprise_sales": "sales",
-    # Customer Success
-    "customer_success": "customer_success",
-    "customer_escalation": "customer_escalation",
-    "churn": "customer_success",
-    "retention": "customer_success",
-    "support": "customer_success",
-    # Research & R&D
-    "research": "research",
-    "rnd": "research",
-    "r_and_d": "research",
-    "experiment": "research",
-    "prototype": "research",
-    "poc": "research",
-    # Risk Assessment
-    "risk": "risk",
-    "risk_assessment": "risk",
-    "mitigation": "risk",
-    "contingency": "risk",
-    # Cross-team & Process
-    "cross_team": "ops",
-    "process": "ops",
-    "coordination": "ops",
-}
-
-
-def _normalize_category(raw_category: str) -> str:
-    """Normalize category name to lowercase with underscores"""
-    # Remove special characters, convert to lowercase
-    normalized = re.sub(r'[^a-zA-Z0-9\s]', '', raw_category.lower())
-    normalized = re.sub(r'\s+', '_', normalized.strip())
-    return normalized
-
-
-def _infer_domain(category: str) -> str:
-    """Infer domain from category name"""
-    category_lower = category.lower()
-
-    # Check direct mapping
-    for key, domain in CATEGORY_TO_DOMAIN.items():
-        if key in category_lower:
-            return domain
-
-    return "general"
-
-
-def _detect_priority(line: str, section_context: str) -> str:
-    """Detect priority from line content and section context"""
-    line_lower = line.lower()
-    section_lower = section_context.lower()
-
-    # High priority indicators
-    high_indicators = [
-        "high_confidence", "high-confidence", "high priority",
-        "always capture", "critical", "must capture",
-        "explicit decision", "trade-off", "security", "compliance"
-    ]
-
-    # Medium priority indicators
-    medium_indicators = [
-        "medium_confidence", "medium-confidence", "medium priority",
-        "usually capture", "context-dependent"
-    ]
-
-    # Check section context first
-    for indicator in high_indicators:
-        if indicator in section_lower:
-            return "high"
-
-    for indicator in medium_indicators:
-        if indicator in section_lower:
-            return "medium"
-
-    # Check line content
-    for indicator in high_indicators:
-        if indicator in line_lower:
-            return "high"
-
-    # Default to medium
-    return "medium"
-
-
-def parse_capture_triggers(md_path: str) -> List[Dict]:
-    """
-    Parse capture-triggers.md into structured pattern list.
-
-    Args:
-        md_path: Path to capture-triggers.md file
-
-    Returns:
-        List of dicts with keys:
-            - text: Pattern text
-            - category: Category name
-            - priority: "high", "medium", or "low"
-            - domain: Domain classification
-
-    Example:
-        [
-            {
-                "text": "We decided to use X instead of Y because...",
-                "category": "architecture_technical_decisions",
-                "priority": "high",
-                "domain": "architecture"
-            },
-            ...
-        ]
-    """
-    path = Path(md_path)
-    if not path.exists():
-        raise FileNotFoundError(f"Pattern file not found: {md_path}")
-
-    content = path.read_text(encoding='utf-8')
-    patterns = []
-
-    current_category = "general"
-    current_section = ""
-    current_priority = "medium"
-
-    lines = content.split('\n')
-
-    for line in lines:
-        line_stripped = line.strip()
-
-        # Skip empty lines and comments
-        if not line_stripped or line_stripped.startswith('<!--'):
-            continue
-
-        # Detect category headers (## Category Name)
-        if line_stripped.startswith('## '):
-            current_category = _normalize_category(line_stripped[3:])
-            current_section = line_stripped
-            continue
-
-        # Detect subsection headers (### Subsection)
-        if line_stripped.startswith('### '):
-            current_section = line_stripped
-            # Update priority based on section name
-            current_priority = _detect_priority(line_stripped, current_section)
-            continue
-
-        # Detect priority markers in content
-        if 'HIGH_CONFIDENCE' in line_stripped or 'High-Priority' in line_stripped:
-            current_priority = "high"
-            continue
-        if 'MEDIUM_CONFIDENCE' in line_stripped or 'Medium-Priority' in line_stripped:
-            current_priority = "medium"
-            continue
-
-        # Extract quoted patterns: - "pattern text..."
-        quote_match = re.match(r'^[-*]\s*["\']([^"\']+)["\']', line_stripped)
-        if quote_match:
-            pattern_text = quote_match.group(1).strip()
-            if len(pattern_text) >= 5:  # Skip very short patterns
-                patterns.append({
-                    "text": pattern_text,
-                    "category": current_category,
-                    "priority": current_priority,
-                    "domain": _infer_domain(current_category),
-                })
-            continue
-
-        # Extract patterns in code blocks or with specific markers
-        # Pattern: `pattern text`
-        backtick_match = re.match(r'^[-*]?\s*`([^`]+)`', line_stripped)
-        if backtick_match:
-            pattern_text = backtick_match.group(1).strip()
-            if len(pattern_text) >= 5:
-                patterns.append({
-                    "text": pattern_text,
-                    "category": current_category,
-                    "priority": current_priority,
-                    "domain": _infer_domain(current_category),
-                })
-            continue
-
-        # Extract list patterns that look like trigger phrases
-        # Pattern: - We decided to... / - Let's go with...
-        list_match = re.match(r'^[-*]\s+([A-Z][^.!?\n]{10,})', line_stripped)
-        if list_match:
-            text = list_match.group(1).strip()
-            # Skip if it looks like a description rather than a pattern
-            if not text.endswith(':') and not text.startswith('Example'):
-                # Check for trigger phrase indicators
-                trigger_indicators = [
-                    'we decided', 'let\'s go', 'chose', 'decision',
-                    'trade-off', 'because', 'rationale', 'reason',
-                    'policy', 'requirement', 'must', 'should',
-                ]
-                text_lower = text.lower()
-                if any(ind in text_lower for ind in trigger_indicators):
-                    patterns.append({
-                        "text": text,
-                        "category": current_category,
-                        "priority": current_priority,
-                        "domain": _infer_domain(current_category),
-                    })
-
-    # Remove duplicates while preserving order
-    seen = set()
-    unique_patterns = []
-    for p in patterns:
-        key = p["text"].lower()
-        if key not in seen:
-            seen.add(key)
-            unique_patterns.append(p)
-
-    return unique_patterns
-
-
-def load_default_patterns() -> List[Dict]:
-    """
-    Load patterns from the default capture-triggers.md location.
-
-    Returns:
-        List of pattern dicts
-    """
-    # Find patterns directory relative to this file
-    current_dir = Path(__file__).parent
-    patterns_dir = current_dir.parent.parent / "patterns"
-    default_path = patterns_dir / "capture-triggers.md"
-
-    if not default_path.exists():
-        print(f"[PatternParser] Warning: Default patterns file not found at {default_path}")
-        return get_builtin_patterns()
-
-    return parse_capture_triggers(str(default_path))
-
-
-def load_all_language_patterns() -> List[Dict]:
-    """Load patterns from all language-specific capture-triggers files.
-
-    Discovers capture-triggers.*.md files in the patterns/ directory
-    and merges them with the base English patterns.
-
-    Returns:
-        List of pattern dicts, each with an optional 'language' key
-    """
-    current_dir = Path(__file__).parent
-    patterns_dir = current_dir.parent.parent / "patterns"
-    all_patterns = []
-
-    # English base patterns
-    base = patterns_dir / "capture-triggers.md"
-    if base.exists():
-        base_patterns = parse_capture_triggers(str(base))
-        for p in base_patterns:
-            p["language"] = "en"
-        all_patterns.extend(base_patterns)
-
-    # Language-specific patterns (capture-triggers.ko.md, capture-triggers.ja.md, ...)
-    for lang_file in sorted(patterns_dir.glob("capture-triggers.*.md")):
-        lang_code = lang_file.stem.split(".")[-1]
-        try:
-            lang_patterns = parse_capture_triggers(str(lang_file))
-            for p in lang_patterns:
-                p["language"] = lang_code
-            all_patterns.extend(lang_patterns)
-            print(f"[PatternParser] Loaded {len(lang_patterns)} patterns for '{lang_code}'")
-        except Exception as e:
-            print(f"[PatternParser] Warning: Failed to load {lang_file}: {e}")
-
-    return all_patterns or get_builtin_patterns()
-
-
-def get_builtin_patterns() -> List[Dict]:
-    """
-    Return built-in fallback patterns when file is not available.
-
-    These are the core patterns that should always be available.
-    """
-    return [
-        # Architecture decisions
-        {"text": "We decided to use", "category": "architecture", "priority": "high", "domain": "architecture"},
-        {"text": "We chose X over Y because", "category": "architecture", "priority": "high", "domain": "architecture"},
-        {"text": "Let's go with", "category": "architecture", "priority": "high", "domain": "architecture"},
-        {"text": "The trade-off is", "category": "architecture", "priority": "high", "domain": "architecture"},
-        {"text": "Design decision:", "category": "architecture", "priority": "high", "domain": "architecture"},
-        {"text": "Architecture decision:", "category": "architecture", "priority": "high", "domain": "architecture"},
-
-        # Security decisions
-        {"text": "Security-wise, we should", "category": "security", "priority": "high", "domain": "security"},
-        {"text": "For compliance, we need", "category": "security", "priority": "high", "domain": "security"},
-        {"text": "The encryption strategy is", "category": "security", "priority": "high", "domain": "security"},
-
-        # Product decisions
-        {"text": "We're prioritizing", "category": "product", "priority": "high", "domain": "product"},
-        {"text": "Feature rejected because", "category": "product", "priority": "high", "domain": "product"},
-        {"text": "Customer feedback shows", "category": "product", "priority": "medium", "domain": "product"},
-
-        # General decisions
-        {"text": "The reason we", "category": "general", "priority": "high", "domain": "general"},
-        {"text": "After discussion, we", "category": "general", "priority": "medium", "domain": "general"},
-        {"text": "The team agreed", "category": "general", "priority": "medium", "domain": "general"},
-        {"text": "Consensus:", "category": "general", "priority": "high", "domain": "general"},
-        {"text": "Final decision:", "category": "general", "priority": "high", "domain": "general"},
-
-        # Performance
-        {"text": "Performance bottleneck identified:", "category": "performance", "priority": "high", "domain": "architecture"},
-        {"text": "This doesn't scale because", "category": "performance", "priority": "high", "domain": "architecture"},
-
-        # Technical debt
-        {"text": "Technical debt—adding to backlog", "category": "technical_debt", "priority": "medium", "domain": "architecture"},
-        {"text": "We can refactor this later", "category": "technical_debt", "priority": "medium", "domain": "architecture"},
-    ]
diff --git a/agents/scribe/record_builder.py b/agents/scribe/record_builder.py
deleted file mode 100644
index c2b5ad5..0000000
--- a/agents/scribe/record_builder.py
+++ /dev/null
@@ -1,703 +0,0 @@
-"""
-Record Builder
-
-Builds Decision Records from raw events and detection results.
-Core component of the Scribe agent's Stage 2 pipeline.
-
-Key Rules:
-- Evidence without quotes → certainty = "unknown"
-- No evidence → status = "proposed"
-- Always generate payload.text for embedding
-"""
-
-import re
-from datetime import datetime, timezone
-from typing import Optional, List, Dict, Any
-from dataclasses import dataclass
-
-from ..common.schemas import (
-    DecisionRecord,
-    DecisionDetail,
-    Context,
-    Why,
-    Evidence,
-    SourceRef,
-    Quality,
-    Payload,
-    Domain,
-    Sensitivity,
-    Status,
-    Certainty,
-    ReviewState,
-    SourceType,
-    generate_record_id,
-    generate_group_id,
-)
-from ..common.schemas.templates import render_payload_text
-from ..common.language import LanguageInfo
-from .detector import DetectionResult
-from .llm_extractor import LLMExtractor, ExtractionResult
-
-
-@dataclass
-class RawEvent:
-    """Raw event from a source (Slack, GitHub, etc.)"""
-    text: str
-    user: str
-    channel: str
-    timestamp: str
-    source: str  # "slack", "github", "notion", etc.
-    thread_ts: Optional[str] = None
-    url: Optional[str] = None
-    additional_context: Optional[Dict[str, Any]] = None
-
-
-class RecordBuilder:
-    """
-    Builds Decision Records from raw events.
-
-    Pipeline:
-    1. Extract decision details from text
-    2. Extract evidence (quotes) from text
-    3. Determine certainty based on evidence
-    4. Generate payload.text for embedding
-
-    Rules enforced:
-    - certainty cannot be "supported" without evidence quotes
-    - status is "proposed" if no evidence
-    - PII/credentials are redacted
-    """
-
-    # Patterns for extracting quotes from text
-    QUOTE_PATTERNS = [
-        r'"([^"]{10,})"',  # Double quotes
-        r"'([^']{10,})'",  # Single quotes
-        r'「([^」]{10,})」',  # Japanese quotes
-        r'«([^»]{10,})»',  # French quotes
-    ]
-
-    # Patterns for extracting rationale
-    RATIONALE_PATTERNS = [
-        r'because\s+(.{10,}?)(?:\.|$)',
-        r'reason(?:ing)?(?:\s+is)?[:\s]+(.{10,}?)(?:\.|$)',
-        r'rationale[:\s]+(.{10,}?)(?:\.|$)',
-        r'since\s+(.{10,}?)(?:\.|$)',
-        r'due to\s+(.{10,}?)(?:\.|$)',
-    ]
-
-    # Patterns for sensitive data to redact
-    SENSITIVE_PATTERNS = [
-        (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]'),  # Email
-        (r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b', '[PHONE]'),  # Phone
-        (r'\b(?:sk|pk|api|key|token|secret|password)[_-][a-zA-Z0-9_-]{15,}\b', '[API_KEY]'),  # API keys with prefix
-        (r'\b[A-Za-z0-9]{32,}\b', '[API_KEY]'),  # Long alphanumeric tokens (32+ chars)
-        (r'\b[0-9]{4}[-\s]?[0-9]{4}[-\s]?[0-9]{4}[-\s]?[0-9]{4}\b', '[CARD]'),  # Credit card
-    ]
-
-    def __init__(
-        self,
-        default_sensitivity: Sensitivity = Sensitivity.INTERNAL,
-        llm_extractor: Optional[LLMExtractor] = None,
-    ):
-        """
-        Initialize record builder.
-
-        Args:
-            default_sensitivity: Default sensitivity when unclear
-            llm_extractor: Optional LLM extractor for non-English text
-        """
-        self._default_sensitivity = default_sensitivity
-        self._llm_extractor = llm_extractor
-
-    def build(
-        self,
-        raw_event: RawEvent,
-        detection: DetectionResult,
-        language: Optional[LanguageInfo] = None,
-    ) -> DecisionRecord:
-        """
-        Build a Decision Record from raw event and detection result.
-
-        Args:
-            raw_event: Raw event data
-            detection: Detection result from DecisionDetector
-            language: Optional detected language info
-
-        Returns:
-            Complete DecisionRecord with payload.text
-        """
-        # Redact sensitive data
-        clean_text, redaction_notes = self._redact_sensitive(raw_event.text)
-
-        if self._llm_extractor and self._llm_extractor.is_available:
-            # ===== LLM extraction (preferred for all languages) =====
-            # Robust to typos, abbreviations, colloquialisms
-            extracted = self._llm_extractor.extract_single(clean_text)
-            title = extracted.title or self._extract_title(clean_text, detection)
-            rationale = extracted.rationale
-            problem = extracted.problem
-            alternatives = extracted.alternatives
-            trade_offs = extracted.trade_offs
-            tags = extracted.tags
-            evidence = self._extract_evidence(raw_event, clean_text)
-            certainty, missing_info = self._determine_certainty(evidence, rationale)
-            status = self._status_from_hint(extracted.status_hint, evidence, clean_text)
-            decision_detail = self._extract_decision_detail(raw_event, clean_text)
-            context = Context(
-                problem=problem,
-                alternatives=alternatives[:5],
-                trade_offs=trade_offs[:5],
-            )
-        else:
-            # ===== Fallback: regex extraction (when LLM unavailable) =====
-            title = self._extract_title(clean_text, detection)
-            decision_detail = self._extract_decision_detail(raw_event, clean_text)
-            context = self._extract_context(clean_text)
-            evidence = self._extract_evidence(raw_event, clean_text)
-            rationale = self._extract_rationale(clean_text)
-            certainty, missing_info = self._determine_certainty(evidence, rationale)
-            status = self._determine_status(evidence, clean_text)
-            tags = None  # will be extracted below
-
-        # Determine domain
-        domain = self._parse_domain(detection.domain)
-
-        # Generate ID
-        timestamp = datetime.now(timezone.utc)
-        record_id = generate_record_id(timestamp, domain, title)
-
-        # Build the record
-        record = DecisionRecord(
-            id=record_id,
-            domain=domain,
-            sensitivity=self._default_sensitivity,
-            status=status,
-            timestamp=timestamp,
-            title=title,
-            decision=decision_detail,
-            context=context,
-            why=Why(
-                rationale_summary=rationale,
-                certainty=certainty,
-                missing_info=missing_info,
-            ),
-            evidence=evidence,
-            tags=tags if tags is not None else self._extract_tags(clean_text, detection),
-            original_text=raw_event.text,
-            quality=Quality(
-                scribe_confidence=detection.confidence,
-                review_state=ReviewState.UNREVIEWED,
-                review_notes=redaction_notes if redaction_notes else None,
-            ),
-            payload=Payload(format="markdown", text=""),
-        )
-
-        # Ensure consistency
-        record.ensure_evidence_certainty_consistency()
-
-        # Generate payload.text
-        record.payload.text = render_payload_text(record)
-
-        return record
-
-    def build_phases(
-        self,
-        raw_event: RawEvent,
-        detection: DetectionResult,
-        language: Optional[LanguageInfo] = None,
-        pre_extraction: Optional[ExtractionResult] = None,
-    ) -> List[DecisionRecord]:
-        """
-        Build one or more Decision Records, splitting into phases if needed.
-
-        For short texts or when LLM is unavailable, returns a single-element list
-        (delegating to build()). For long reasoning chains, splits into linked
-        phase records sharing a group_id.
-
-        Args:
-            raw_event: Raw event data
-            detection: Detection result from DecisionDetector
-            language: Optional detected language info
-            pre_extraction: Pre-built ExtractionResult from calling agent
-                (agent-delegated mode). When provided, LLMExtractor is skipped entirely.
-
-        Returns:
-            List of DecisionRecords (1 for single, 2-7 for phase chain)
-        """
-        MAX_INPUT_CHARS = 12_000  # ~3k tokens — guard against huge inputs
-        clean_text, redaction_notes = self._redact_sensitive(raw_event.text)
-        clean_text = clean_text[:MAX_INPUT_CHARS]
-
-        if pre_extraction is not None:
-            extraction = pre_extraction
-        elif self._llm_extractor and self._llm_extractor.is_available:
-            # Phase-aware extraction (auto-detects short vs long)
-            extraction = self._llm_extractor.extract(clean_text)
-        else:
-            # Without LLM and no pre_extraction, fall back to single record
-            return [self.build(raw_event, detection, language)]
-
-        if not extraction.is_multi_phase:
-            # Single record — use the single extraction result
-            fields = extraction.single
-            if fields is None:
-                return [self.build(raw_event, detection, language)]
-            return [self._build_single_record_from_extraction(
-                fields=fields,
-                raw_event=raw_event,
-                clean_text=clean_text,
-                detection=detection,
-                extraction=extraction,
-                redaction_notes=redaction_notes,
-                pre_extraction=pre_extraction,
-            )]
-
-        # ===== Multi-record: phase_chain or bundle =====
-        return self._build_multi_record_from_extraction(
-            extraction=extraction,
-            raw_event=raw_event,
-            clean_text=clean_text,
-            detection=detection,
-            redaction_notes=redaction_notes,
-        )
-
-    def _build_single_record_from_extraction(
-        self,
-        fields: Any,
-        raw_event: RawEvent,
-        clean_text: str,
-        detection: DetectionResult,
-        extraction: ExtractionResult,
-        redaction_notes: Optional[str],
-        pre_extraction: Optional[ExtractionResult] = None,
-    ) -> DecisionRecord:
-        title = fields.title or self._extract_title(clean_text, detection)
-        evidence = self._extract_evidence(raw_event, clean_text)
-        certainty, missing_info = self._determine_certainty(evidence, fields.rationale)
-        status = self._status_from_hint(fields.status_hint, evidence, clean_text)
-        domain = self._parse_domain(detection.domain)
-        timestamp = datetime.now(timezone.utc)
-        record_id = generate_record_id(timestamp, domain, title)
-
-        record = DecisionRecord(
-            id=record_id,
-            domain=domain,
-            sensitivity=self._default_sensitivity,
-            status=status,
-            timestamp=timestamp,
-            title=title,
-            decision=self._extract_decision_detail(raw_event, clean_text),
-            context=Context(
-                problem=fields.problem,
-                alternatives=fields.alternatives[:5],
-                trade_offs=fields.trade_offs[:5],
-            ),
-            why=Why(
-                rationale_summary=fields.rationale,
-                certainty=certainty,
-                missing_info=missing_info,
-            ),
-            evidence=evidence,
-            tags=fields.tags or self._extract_tags(clean_text, detection),
-            original_text=raw_event.text,
-            quality=Quality(
-                scribe_confidence=extraction.confidence if extraction.confidence is not None else detection.confidence,
-                review_state=ReviewState.UNREVIEWED,
-                review_notes=redaction_notes if redaction_notes else None,
-            ),
-            payload=Payload(format="markdown", text=""),
-        )
-        record.ensure_evidence_certainty_consistency()
-        record.payload.text = render_payload_text(record)
-
-        # Populate reusable_insight from pre_extraction group_summary
-        if pre_extraction and getattr(pre_extraction, 'group_summary', None):
-            record.reusable_insight = pre_extraction.group_summary
-
-        return record
-
-    def _build_multi_record_from_extraction(
-        self,
-        extraction: ExtractionResult,
-        raw_event: RawEvent,
-        clean_text: str,
-        detection: DetectionResult,
-        redaction_notes: Optional[str],
-    ) -> List[DecisionRecord]:
-        phases = extraction.phases
-        domain = self._parse_domain(detection.domain)
-        timestamp = datetime.now(timezone.utc)
-        group_title = extraction.group_title or self._extract_title(clean_text, detection)
-        group_id = generate_group_id(timestamp, domain, group_title)
-        group_type = extraction.group_type or "phase_chain"
-        phase_total = len(phases)
-
-        records: List[DecisionRecord] = []
-        for seq, phase in enumerate(phases):
-            phase_title = phase.phase_title or f"Phase {seq + 1}"
-            suffix = f"_b{seq}" if group_type == "bundle" else f"_p{seq}"
-            record_id = generate_record_id(timestamp, domain, phase_title) + suffix
-
-            # Parse timestamp for decision detail
-            when = ""
-            if raw_event.timestamp:
-                try:
-                    ts = float(raw_event.timestamp)
-                    when = datetime.fromtimestamp(ts, tz=timezone.utc).strftime("%Y-%m-%d")
-                except (ValueError, TypeError):
-                    when = raw_event.timestamp
-
-            decision_detail = DecisionDetail(
-                what=phase.phase_decision[:500],
-                who=[f"user:{raw_event.user}"] if raw_event.user else [],
-                where=f"{raw_event.source}:{raw_event.channel}" if raw_event.channel else raw_event.source,
-                when=when,
-            )
-
-            evidence = self._extract_evidence(raw_event, clean_text)
-            certainty, missing_info = self._determine_certainty(evidence, phase.phase_rationale)
-            status = self._status_from_hint(extraction.status_hint, evidence, clean_text)
-
-            record = DecisionRecord(
-                id=record_id,
-                domain=domain,
-                sensitivity=self._default_sensitivity,
-                status=status,
-                timestamp=timestamp,
-                title=phase_title,
-                decision=decision_detail,
-                context=Context(
-                    problem=phase.phase_problem,
-                    alternatives=phase.alternatives[:5],
-                    trade_offs=phase.trade_offs[:5],
-                ),
-                why=Why(
-                    rationale_summary=phase.phase_rationale,
-                    certainty=certainty,
-                    missing_info=missing_info,
-                ),
-                evidence=evidence,
-                tags=phase.tags or extraction.tags or self._extract_tags(clean_text, detection),
-                quality=Quality(
-                    scribe_confidence=extraction.confidence if extraction.confidence is not None else detection.confidence,
-                    review_state=ReviewState.UNREVIEWED,
-                    review_notes=redaction_notes if redaction_notes else None,
-                ),
-                original_text=raw_event.text,
-                group_summary=getattr(extraction, 'group_summary', None) or None,
-                payload=Payload(format="markdown", text=""),
-                # Group fields (phase_chain or bundle)
-                group_id=group_id,
-                group_type=group_type,
-                phase_seq=seq,
-                phase_total=phase_total,
-            )
-            record.ensure_evidence_certainty_consistency()
-            record.payload.text = render_payload_text(record)
-
-            # Populate reusable_insight from pre_extraction group_summary
-            if getattr(extraction, 'group_summary', None):
-                record.reusable_insight = extraction.group_summary
-
-            records.append(record)
-
-        return records
-
-    def _redact_sensitive(self, text: str) -> tuple[str, Optional[str]]:
-        """Redact sensitive data from text"""
-        redacted = text
-        redactions = []
-
-        for pattern, replacement in self.SENSITIVE_PATTERNS:
-            matches = re.findall(pattern, redacted, re.IGNORECASE)
-            if matches:
-                redacted = re.sub(pattern, replacement, redacted, flags=re.IGNORECASE)
-                redactions.append(f"Redacted {len(matches)} {replacement}")
-
-        notes = "; ".join(redactions) if redactions else None
-        return redacted, notes
-
-    def _extract_title(self, text: str, detection: DetectionResult) -> str:
-        """Extract a short title from text"""
-        # Try to find a decision statement
-        title_patterns = [
-            r'(?:decided|chose|going with|adopting)\s+(.{5,50}?)(?:\.|,|because)',
-            r'decision[:\s]+(.{5,50}?)(?:\.|$)',
-        ]
-
-        for pattern in title_patterns:
-            match = re.search(pattern, text, re.IGNORECASE)
-            if match:
-                return match.group(1).strip()
-
-        # Fall back to first sentence or category
-        first_sentence = text.split('.')[0][:60]
-        if len(first_sentence) > 10:
-            return first_sentence.strip()
-
-        return f"{detection.category or 'General'} decision"
-
-    def _extract_decision_detail(self, raw_event: RawEvent, text: str) -> DecisionDetail:
-        """Extract decision details"""
-        # Parse timestamp
-        when = ""
-        if raw_event.timestamp:
-            try:
-                ts = float(raw_event.timestamp)
-                when = datetime.fromtimestamp(ts, tz=timezone.utc).strftime("%Y-%m-%d")
-            except (ValueError, TypeError):
-                when = raw_event.timestamp
-
-        return DecisionDetail(
-            what=text[:500],  # Limit length
-            who=[f"user:{raw_event.user}"] if raw_event.user else [],
-            where=f"{raw_event.source}:{raw_event.channel}" if raw_event.channel else raw_event.source,
-            when=when,
-        )
-
-    def _extract_context(self, text: str) -> Context:
-        """Extract context from text"""
-        # Try to find problem statement
-        problem = ""
-        problem_patterns = [
-            r'(?:problem|issue|challenge)[:\s]+(.{10,200}?)(?:\.|$)',
-            r'(?:because|since)\s+(.{10,200}?)(?:,|we)',
-        ]
-        for pattern in problem_patterns:
-            match = re.search(pattern, text, re.IGNORECASE)
-            if match:
-                problem = match.group(1).strip()
-                break
-
-        # Try to find alternatives
-        alternatives = []
-        alt_patterns = [
-            r'(?:alternatives?|options?|considered)[:\s]+(.{10,200}?)(?:\.|$)',
-            r'(?:instead of|over|rather than)\s+(\w+(?:\s+\w+){0,3})',
-        ]
-        for pattern in alt_patterns:
-            matches = re.findall(pattern, text, re.IGNORECASE)
-            alternatives.extend([m.strip() for m in matches if len(m.strip()) > 2])
-
-        # Try to find trade-offs
-        trade_offs = []
-        tradeoff_patterns = [
-            r'(?:trade-?off|downside|con)[:\s]+(.{10,100}?)(?:\.|$)',
-            r'(?:but|however)\s+(.{10,100}?)(?:\.|$)',
-        ]
-        for pattern in tradeoff_patterns:
-            matches = re.findall(pattern, text, re.IGNORECASE)
-            trade_offs.extend([m.strip() for m in matches if len(m.strip()) > 5])
-
-        return Context(
-            problem=problem,
-            alternatives=alternatives[:5],  # Limit
-            trade_offs=trade_offs[:5],
-        )
-
-    def _extract_evidence(self, raw_event: RawEvent, text: str) -> List[Evidence]:
-        """Extract evidence with quotes from text"""
-        evidence = []
-
-        # Find quotes in text
-        for pattern in self.QUOTE_PATTERNS:
-            matches = re.findall(pattern, text)
-            for quote in matches:
-                if len(quote) >= 10:
-                    evidence.append(Evidence(
-                        claim="Quoted statement from discussion",
-                        quote=quote[:200],  # Limit quote length
-                        source=SourceRef(
-                            type=self._parse_source_type(raw_event.source),
-                            url=raw_event.url,
-                            pointer=f"channel:{raw_event.channel}" if raw_event.channel else None,
-                        ),
-                    ))
-
-        # If no quotes found, create evidence from the text itself
-        # but mark it as needing verification
-        if not evidence and len(text) >= 20:
-            # Use the text as a paraphrase, not a quote
-            evidence.append(Evidence(
-                claim="Decision statement (paraphrased)",
-                quote=text[:150] + "..." if len(text) > 150 else text,
-                source=SourceRef(
-                    type=self._parse_source_type(raw_event.source),
-                    url=raw_event.url,
-                    pointer=f"channel:{raw_event.channel}" if raw_event.channel else None,
-                ),
-            ))
-
-        return evidence[:3]  # Limit to 3 pieces of evidence
-
-    def _extract_rationale(self, text: str) -> str:
-        """Extract rationale/reasoning from text"""
-        for pattern in self.RATIONALE_PATTERNS:
-            match = re.search(pattern, text, re.IGNORECASE)
-            if match:
-                return match.group(1).strip()
-
-        # If no explicit rationale, return empty
-        return ""
-
-    def _determine_certainty(
-        self,
-        evidence: List[Evidence],
-        rationale: str
-    ) -> tuple[Certainty, List[str]]:
-        """
-        Determine certainty level based on evidence.
-
-        Rules:
-        - No evidence → unknown
-        - Evidence without direct quotes → partially_supported
-        - Evidence with direct quotes → supported (if rationale also present)
-        """
-        missing_info = []
-
-        if not evidence:
-            missing_info.append("No evidence found")
-            return Certainty.UNKNOWN, missing_info
-
-        # Check if evidence has actual quotes (not paraphrases)
-        has_direct_quotes = any(
-            "paraphrase" not in e.claim.lower()
-            for e in evidence
-        )
-
-        if not has_direct_quotes:
-            missing_info.append("No direct quotes - evidence is paraphrased")
-            return Certainty.PARTIALLY_SUPPORTED, missing_info
-
-        if not rationale:
-            missing_info.append("Explicit rationale not found")
-            return Certainty.PARTIALLY_SUPPORTED, missing_info
-
-        return Certainty.SUPPORTED, missing_info
-
-    def _determine_status(self, evidence: List[Evidence], text: str) -> Status:
-        """
-        Determine decision status.
-
-        Rules:
-        - No evidence → proposed
-        - Explicit acceptance markers → accepted
-        - Default → proposed (conservative)
-        """
-        if not evidence:
-            return Status.PROPOSED
-
-        # Look for acceptance markers
-        acceptance_patterns = [
-            r'\b(?:approved|accepted|confirmed|finalized|agreed|decided)\b',
-            r'\b(?:final decision|it\'s decided|we\'re going with)\b',
-        ]
-
-        text_lower = text.lower()
-        for pattern in acceptance_patterns:
-            if re.search(pattern, text_lower):
-                return Status.ACCEPTED
-
-        # Default to proposed (conservative)
-        return Status.PROPOSED
-
-    def _status_from_hint(
-        self,
-        hint: str,
-        evidence: List[Evidence],
-        text: str,
-    ) -> Status:
-        """Determine status from LLM-provided hint with fallback to rules."""
-        hint_lower = hint.lower().strip()
-        if hint_lower == "accepted":
-            return Status.ACCEPTED
-        if hint_lower == "rejected":
-            return Status.PROPOSED  # Rejected proposals are still proposals, not superseded
-        if hint_lower == "proposed":
-            return Status.PROPOSED
-        # Fallback to regex-based detection
-        return self._determine_status(evidence, text)
-
-    def _parse_domain(self, domain_str: Optional[str]) -> Domain:
-        """Parse domain string to Domain enum"""
-        if not domain_str:
-            return Domain.GENERAL
-
-        domain_lower = domain_str.lower()
-
-        # Map string to enum
-        domain_map = {
-            "architecture": Domain.ARCHITECTURE,
-            "security": Domain.SECURITY,
-            "product": Domain.PRODUCT,
-            "exec": Domain.EXEC,
-            "ops": Domain.OPS,
-            "design": Domain.DESIGN,
-            "data": Domain.DATA,
-            "hr": Domain.HR,
-            "marketing": Domain.MARKETING,
-            "incident": Domain.INCIDENT,
-            "debugging": Domain.DEBUGGING,
-            "qa": Domain.QA,
-            "legal": Domain.LEGAL,
-            "finance": Domain.FINANCE,
-            "sales": Domain.SALES,
-            "customer_success": Domain.CUSTOMER_SUCCESS,
-            "customer_escalation": Domain.CUSTOMER_SUCCESS,
-            "research": Domain.RESEARCH,
-            "risk": Domain.RISK,
-        }
-
-        for key, value in domain_map.items():
-            if key in domain_lower:
-                return value
-
-        return Domain.GENERAL
-
-    def _parse_source_type(self, source: str) -> SourceType:
-        """Parse source string to SourceType enum"""
-        source_lower = source.lower()
-
-        if "slack" in source_lower:
-            return SourceType.SLACK
-        if "github" in source_lower:
-            return SourceType.GITHUB
-        if "notion" in source_lower:
-            return SourceType.NOTION
-        if "meeting" in source_lower:
-            return SourceType.MEETING
-        if "email" in source_lower:
-            return SourceType.EMAIL
-        if "doc" in source_lower:
-            return SourceType.DOC
-
-        return SourceType.OTHER
-
-    def _extract_tags(self, text: str, detection: DetectionResult) -> List[str]:
-        """Extract relevant tags"""
-        tags = []
-
-        # Add domain as tag
-        if detection.domain:
-            tags.append(detection.domain)
-
-        # Add category as tag
-        if detection.category and detection.category != detection.domain:
-            tags.append(detection.category.replace("_", "-"))
-
-        # Extract hashtags if present
-        hashtags = re.findall(r'#(\w+)', text)
-        tags.extend(hashtags[:5])
-
-        # Common keywords as tags
-        keywords = [
-            "microservices", "monolith", "database", "api", "security",
-            "performance", "scalability", "migration", "refactor",
-            "deprecation", "compliance", "gdpr", "sso", "auth",
-        ]
-        text_lower = text.lower()
-        for kw in keywords:
-            if kw in text_lower and kw not in tags:
-                tags.append(kw)
-
-        return list(set(tags))[:10]  # Unique, max 10
diff --git a/agents/scribe/review_queue.py b/agents/scribe/review_queue.py
deleted file mode 100644
index ea6fad6..0000000
--- a/agents/scribe/review_queue.py
+++ /dev/null
@@ -1,352 +0,0 @@
-"""
-Review Queue
-
-Manages human review for low-confidence captures.
-Implements Stage 3 of the Scribe pipeline.
-
-Review Questions (minimum 3):
-Q1. Is this worth saving? (Capture/Ignore)
-Q2. Is the "Why" supported by evidence? (Supported/Partial/Unknown)
-Q3. Is the sensitivity label correct? (public/internal/restricted)
-Q4. (Optional) Is this the final decision? (proposed/accepted/superseded/reverted)
-"""
-
-import json
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import List, Optional, Dict, Any
-from dataclasses import dataclass, field, asdict
-from enum import Enum
-
-from ..common.schemas import (
-    DecisionRecord,
-    Certainty,
-    Sensitivity,
-    Status,
-    ReviewState,
-)
-from ..common.schemas.templates import render_payload_text
-from ..common.config import REVIEW_QUEUE_PATH
-
-
-class ReviewAnswer(str, Enum):
-    """Possible answers for review questions"""
-    # Q1: Worth saving?
-    CAPTURE = "capture"
-    IGNORE = "ignore"
-
-    # Q2: Evidence supported?
-    SUPPORTED = "supported"
-    PARTIALLY_SUPPORTED = "partially_supported"
-    UNKNOWN = "unknown"
-
-    # Q3: Sensitivity
-    PUBLIC = "public"
-    INTERNAL = "internal"
-    RESTRICTED = "restricted"
-
-    # Q4: Status
-    PROPOSED = "proposed"
-    ACCEPTED = "accepted"
-    SUPERSEDED = "superseded"
-    REVERTED = "reverted"
-
-
-@dataclass
-class ReviewAnswers:
-    """Answers to review questions"""
-    q1_worth_saving: ReviewAnswer  # capture or ignore
-    q2_evidence_supported: ReviewAnswer  # supported, partially_supported, unknown
-    q3_sensitivity: ReviewAnswer  # public, internal, restricted
-    q4_status: Optional[ReviewAnswer] = None  # proposed, accepted, superseded, reverted
-    reviewer_notes: Optional[str] = None
-
-
-@dataclass
-class ReviewItem:
-    """Item in the review queue"""
-    record_id: str
-    record_json: Dict[str, Any]
-    detection_confidence: float
-    created_at: str
-    questions: List[str] = field(default_factory=list)
-    status: str = "pending"  # pending, reviewed, expired
-
-
-class ReviewQueue:
-    """
-    Manages the review queue for human review of captures.
-
-    The queue is persisted to ~/.rune/review_queue.json.
-
-    Workflow:
-    1. Scribe adds low-confidence captures to queue
-    2. Human reviews via UI or CLI
-    3. Approved items are stored to enVector
-    4. Ignored items are discarded
-    """
-
-    # Standard review questions
-    QUESTIONS = [
-        "Q1. Is this decision/learning/rejection worth saving to organizational memory?",
-        "Q2. Is the 'Why' (rationale) supported by the evidence (quotes)?",
-        "Q3. Is the sensitivity label (public/internal/restricted) correct?",
-        "Q4. (Optional) Is this the final decision status?",
-    ]
-
-    def __init__(self, queue_path: Optional[Path] = None):
-        """
-        Initialize review queue.
-
-        Args:
-            queue_path: Path to queue file (default: ~/.rune/review_queue.json)
-        """
-        self._queue_path = queue_path or REVIEW_QUEUE_PATH
-        self._queue: List[ReviewItem] = []
-        self._load_queue()
-
-    def _load_queue(self) -> None:
-        """Load queue from disk"""
-        if not self._queue_path.exists():
-            self._queue = []
-            return
-
-        try:
-            with open(self._queue_path) as f:
-                data = json.load(f)
-
-            self._queue = [
-                ReviewItem(
-                    record_id=item["record_id"],
-                    record_json=item["record_json"],
-                    detection_confidence=item["detection_confidence"],
-                    created_at=item["created_at"],
-                    questions=item.get("questions", self.QUESTIONS),
-                    status=item.get("status", "pending"),
-                )
-                for item in data
-            ]
-        except (json.JSONDecodeError, IOError, KeyError) as e:
-            print(f"[ReviewQueue] Warning: Failed to load queue: {e}")
-            self._queue = []
-
-    def _save_queue(self) -> None:
-        """Save queue to disk"""
-        self._queue_path.parent.mkdir(parents=True, exist_ok=True)
-
-        data = [
-            {
-                "record_id": item.record_id,
-                "record_json": item.record_json,
-                "detection_confidence": item.detection_confidence,
-                "created_at": item.created_at,
-                "questions": item.questions,
-                "status": item.status,
-            }
-            for item in self._queue
-        ]
-
-        with open(self._queue_path, "w") as f:
-            json.dump(data, f, indent=2, default=str)
-
-    def add(
-        self,
-        record: DecisionRecord,
-        detection_confidence: float
-    ) -> str:
-        """
-        Add a record to the review queue.
-
-        Args:
-            record: Decision record to review
-            detection_confidence: Confidence from detector
-
-        Returns:
-            Record ID
-        """
-        # Convert record to dict for JSON storage
-        record_dict = record.model_dump(mode='json')
-
-        item = ReviewItem(
-            record_id=record.id,
-            record_json=record_dict,
-            detection_confidence=detection_confidence,
-            created_at=datetime.now(timezone.utc).isoformat(),
-            questions=self.QUESTIONS.copy(),
-            status="pending",
-        )
-
-        self._queue.append(item)
-        self._save_queue()
-
-        print(f"[ReviewQueue] Added {record.id} for review (confidence: {detection_confidence:.2f})")
-        return record.id
-
-    def get_pending(self) -> List[ReviewItem]:
-        """Get all pending review items"""
-        return [item for item in self._queue if item.status == "pending"]
-
-    def get_item(self, record_id: str) -> Optional[ReviewItem]:
-        """Get a specific review item by ID"""
-        for item in self._queue:
-            if item.record_id == record_id:
-                return item
-        return None
-
-    def submit_review(
-        self,
-        record_id: str,
-        answers: ReviewAnswers,
-        reviewer: Optional[str] = None
-    ) -> Optional[DecisionRecord]:
-        """
-        Submit review for an item.
-
-        Args:
-            record_id: ID of the record being reviewed
-            answers: Review answers
-            reviewer: Reviewer identifier
-
-        Returns:
-            Updated DecisionRecord if approved, None if ignored
-
-        Side effects:
-            - Updates item status in queue
-            - Modifies record based on answers
-        """
-        item = self.get_item(record_id)
-        if not item:
-            print(f"[ReviewQueue] Item not found: {record_id}")
-            return None
-
-        # Check Q1: Worth saving?
-        if answers.q1_worth_saving == ReviewAnswer.IGNORE:
-            item.status = "rejected"
-            self._save_queue()
-            print(f"[ReviewQueue] Item {record_id} rejected by reviewer")
-            return None
-
-        # Reconstruct record from JSON
-        record = DecisionRecord.model_validate(item.record_json)
-
-        # Apply Q2: Update certainty
-        certainty_map = {
-            ReviewAnswer.SUPPORTED: Certainty.SUPPORTED,
-            ReviewAnswer.PARTIALLY_SUPPORTED: Certainty.PARTIALLY_SUPPORTED,
-            ReviewAnswer.UNKNOWN: Certainty.UNKNOWN,
-        }
-        if answers.q2_evidence_supported in certainty_map:
-            record.why.certainty = certainty_map[answers.q2_evidence_supported]
-
-        # Apply Q3: Update sensitivity
-        sensitivity_map = {
-            ReviewAnswer.PUBLIC: Sensitivity.PUBLIC,
-            ReviewAnswer.INTERNAL: Sensitivity.INTERNAL,
-            ReviewAnswer.RESTRICTED: Sensitivity.RESTRICTED,
-        }
-        if answers.q3_sensitivity in sensitivity_map:
-            record.sensitivity = sensitivity_map[answers.q3_sensitivity]
-
-        # Apply Q4: Update status (if provided)
-        if answers.q4_status:
-            status_map = {
-                ReviewAnswer.PROPOSED: Status.PROPOSED,
-                ReviewAnswer.ACCEPTED: Status.ACCEPTED,
-                ReviewAnswer.SUPERSEDED: Status.SUPERSEDED,
-                ReviewAnswer.REVERTED: Status.REVERTED,
-            }
-            if answers.q4_status in status_map:
-                record.status = status_map[answers.q4_status]
-
-        # Update quality metadata
-        record.quality.review_state = ReviewState.APPROVED
-        record.quality.reviewed_by = reviewer
-        if answers.reviewer_notes:
-            existing_notes = record.quality.review_notes or ""
-            record.quality.review_notes = f"{existing_notes}\nReviewer: {answers.reviewer_notes}".strip()
-
-        # Regenerate payload.text with updated values
-        record.payload.text = render_payload_text(record)
-
-        # Update queue status
-        item.status = "reviewed"
-        self._save_queue()
-
-        print(f"[ReviewQueue] Item {record_id} approved by {reviewer or 'unknown'}")
-        return record
-
-    def remove(self, record_id: str) -> bool:
-        """Remove an item from the queue"""
-        for i, item in enumerate(self._queue):
-            if item.record_id == record_id:
-                del self._queue[i]
-                self._save_queue()
-                return True
-        return False
-
-    def clear_reviewed(self) -> int:
-        """Clear all reviewed items from queue"""
-        original_len = len(self._queue)
-        self._queue = [item for item in self._queue if item.status == "pending"]
-        self._save_queue()
-        return original_len - len(self._queue)
-
-    def get_stats(self) -> Dict[str, int]:
-        """Get queue statistics"""
-        stats = {
-            "total": len(self._queue),
-            "pending": 0,
-            "reviewed": 0,
-            "rejected": 0,
-        }
-        for item in self._queue:
-            if item.status in stats:
-                stats[item.status] += 1
-        return stats
-
-    def format_for_review(self, item: ReviewItem) -> str:
-        """Format a review item for display"""
-        record_dict = item.record_json
-
-        lines = [
-            "=" * 60,
-            f"REVIEW ITEM: {item.record_id}",
-            f"Detection Confidence: {item.detection_confidence:.2f}",
-            f"Created: {item.created_at}",
-            "=" * 60,
-            "",
-            f"Title: {record_dict.get('title', 'N/A')}",
-            f"Domain: {record_dict.get('domain', 'N/A')}",
-            f"Current Sensitivity: {record_dict.get('sensitivity', 'N/A')}",
-            f"Current Status: {record_dict.get('status', 'N/A')}",
-            "",
-            "Decision:",
-            f"  {record_dict.get('decision', {}).get('what', 'N/A')[:200]}",
-            "",
-            "Why (Rationale):",
-            f"  {record_dict.get('why', {}).get('rationale_summary', 'N/A')[:200]}",
-            f"  Certainty: {record_dict.get('why', {}).get('certainty', 'N/A')}",
-            "",
-            "Evidence:",
-        ]
-
-        evidence = record_dict.get('evidence', [])
-        if evidence:
-            for i, e in enumerate(evidence[:3], 1):
-                lines.append(f"  {i}. Claim: {e.get('claim', 'N/A')[:100]}")
-                lines.append(f"     Quote: \"{e.get('quote', 'N/A')[:100]}\"")
-        else:
-            lines.append("  (none)")
-
-        lines.extend([
-            "",
-            "-" * 60,
-            "REVIEW QUESTIONS:",
-        ])
-
-        for i, q in enumerate(item.questions, 1):
-            lines.append(f"  {q}")
-
-        lines.append("=" * 60)
-
-        return "\n".join(lines)
diff --git a/agents/scribe/server.py b/agents/scribe/server.py
deleted file mode 100644
index 93633c5..0000000
--- a/agents/scribe/server.py
+++ /dev/null
@@ -1,576 +0,0 @@
-"""
-Scribe Server
-
-FastAPI server for receiving webhooks and capturing organizational context.
-
-Endpoints:
-- POST /slack/events: Slack webhook endpoint
-- POST /notion/events: Notion webhook endpoint
-- GET /health: Health check
-- GET /review: Get pending reviews
-- POST /review/{record_id}: Submit review
-
-Pipeline:
-1. Receive webhook event
-2. Parse with appropriate handler
-3. Detect significance with pattern matching
-4. Build Decision Record
-5. Auto-capture or add to review queue
-6. Store to enVector
-"""
-
-import json
-import logging
-import os
-from datetime import datetime, timezone
-from typing import Optional
-from contextlib import asynccontextmanager
-
-logger = logging.getLogger("rune.scribe")
-
-from fastapi import FastAPI, Request, HTTPException, Header, BackgroundTasks
-from fastapi.responses import JSONResponse
-from pydantic import BaseModel
-
-from ..common.config import load_config, RuneConfig, ensure_directories
-from ..common.embedding_service import EmbeddingService, get_embedding_service
-from ..common.envector_client import EnVectorClient
-from ..common.pattern_cache import PatternCache
-from ..common.language import detect_language
-from .pattern_parser import load_default_patterns, load_all_language_patterns
-from .detector import DecisionDetector
-from .record_builder import RecordBuilder, RawEvent
-from .llm_extractor import LLMExtractor
-from .review_queue import ReviewQueue, ReviewAnswers, ReviewAnswer
-from .tier2_filter import Tier2Filter
-from .handlers import SlackHandler, NotionHandler, Message
-
-
-# Global state
-config: Optional[RuneConfig] = None
-index_name: Optional[str] = None
-detector: Optional[DecisionDetector] = None
-tier2_filter: Optional[Tier2Filter] = None
-record_builder: Optional[RecordBuilder] = None
-envector_client: Optional[EnVectorClient] = None
-review_queue: Optional[ReviewQueue] = None
-slack_handler: Optional[SlackHandler] = None
-notion_handler: Optional[NotionHandler] = None
-embedding_service: Optional[EmbeddingService] = None
-
-
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    """Initialize components on startup"""
-    global config, index_name, detector, tier2_filter, record_builder, envector_client, review_queue
-    global slack_handler, notion_handler, embedding_service
-
-    logger.info("Starting up...")
-
-    # Ensure directories exist
-    ensure_directories()
-
-    # Load config
-    config = load_config()
-    logger.info("Loaded config (state: %s)", config.state)
-
-    # Index name must be provided via environment variable (set by Vault or admin)
-    index_name = os.getenv("ENVECTOR_INDEX_NAME")
-    if not index_name:
-        logger.warning("ENVECTOR_INDEX_NAME not set — record storage will be unavailable")
-
-    # Initialize embedding service
-    logger.info("Initializing embedding service...")
-    embedding_service = get_embedding_service(
-        mode=config.embedding.mode,
-        model=config.embedding.model
-    )
-
-    # Load and embed patterns (including multilingual)
-    logger.info("Loading patterns...")
-    patterns = load_all_language_patterns()
-    logger.info("Found %d patterns", len(patterns))
-
-    pattern_cache = PatternCache(embedding_service)
-    pattern_cache.load_patterns(patterns)
-
-    # Initialize detector
-    detector = DecisionDetector(
-        pattern_cache=pattern_cache,
-        threshold=config.scribe.similarity_threshold,
-        high_confidence_threshold=config.scribe.auto_capture_threshold
-    )
-    logger.info("Detector ready (threshold: %s)", config.scribe.similarity_threshold)
-
-    # LLM configuration
-    llm_cfg = config.llm
-    llm_provider = (llm_cfg.provider or os.getenv("RUNE_LLM_PROVIDER", "anthropic")).lower()
-    tier2_provider = (llm_cfg.tier2_provider or os.getenv("RUNE_TIER2_LLM_PROVIDER", llm_provider)).lower()
-    anthropic_key = llm_cfg.anthropic_api_key or os.getenv("ANTHROPIC_API_KEY") or None
-    openai_key = llm_cfg.openai_api_key or os.getenv("OPENAI_API_KEY") or None
-    google_key = llm_cfg.google_api_key or os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY") or None
-
-    def _provider_key(provider: str):
-        if provider == "openai":
-            return openai_key
-        if provider == "google":
-            return google_key
-        return anthropic_key
-
-    def _provider_model(provider: str, role: str) -> str:
-        if provider == "openai":
-            if role == "tier2" and llm_cfg.openai_tier2_model:
-                return llm_cfg.openai_tier2_model
-            return llm_cfg.openai_model
-        if provider == "google":
-            if role == "tier2" and llm_cfg.google_tier2_model:
-                return llm_cfg.google_tier2_model
-            return llm_cfg.google_model
-        if role == "tier2":
-            return config.scribe.tier2_model
-        return llm_cfg.anthropic_model
-
-    # Initialize Tier 2 LLM filter
-    if config.scribe.tier2_enabled and _provider_key(tier2_provider):
-        tier2_filter = Tier2Filter(
-            llm_provider=tier2_provider,
-            anthropic_api_key=anthropic_key,
-            openai_api_key=openai_key,
-            google_api_key=google_key,
-            model=_provider_model(tier2_provider, "tier2"),
-        )
-        if tier2_filter.is_available:
-            logger.info("Tier 2 LLM filter ready (%s/%s)", tier2_provider, tier2_filter._model)
-        else:
-            logger.warning("Tier 2 LLM filter init failed (Tier 1 only)")
-    else:
-        tier2_filter = None
-        logger.info("Tier 2 LLM filter disabled" if not config.scribe.tier2_enabled else "Tier 2 skipped (no API key)")
-
-    # Initialize Tier 3 LLM extractor (for record building)
-    llm_extractor = LLMExtractor(
-        llm_provider=llm_provider,
-        anthropic_api_key=anthropic_key,
-        openai_api_key=openai_key,
-        google_api_key=google_key,
-        model=_provider_model(llm_provider, "extract"),
-    )
-    if llm_extractor.is_available:
-        logger.info("Tier 3 LLM extractor ready (Sonnet)")
-    else:
-        logger.info("Tier 3 LLM extractor not available (regex fallback)")
-
-    # Initialize record builder
-    record_builder = RecordBuilder(llm_extractor=llm_extractor)
-
-    # Initialize review queue
-    review_queue = ReviewQueue()
-    stats = review_queue.get_stats()
-    logger.info("Review queue: %d pending", stats['pending'])
-
-    # Initialize handlers
-    slack_handler = SlackHandler(signing_secret=config.scribe.slack_signing_secret)
-    notion_handler = NotionHandler(signing_secret=config.scribe.notion_signing_secret)
-
-    # Initialize enVector client (if configured)
-    if config.envector.endpoint:
-        try:
-            envector_client = EnVectorClient(
-                address=config.envector.endpoint,
-                access_token=config.envector.api_key or None,
-            )
-            logger.info("EnVector client ready (%s)", config.envector.endpoint)
-        except Exception as e:
-            logger.warning("EnVector client failed: %s", e)
-            envector_client = None
-
-    logger.info("Ready to receive events")
-
-    yield
-
-    # Cleanup
-    logger.info("Shutting down...")
-
-
-app = FastAPI(
-    title="Rune Scribe Agent",
-    description="Organizational context capture via webhooks",
-    version="0.1.0",
-    lifespan=lifespan
-)
-
-
-# =============================================================================
-# Request/Response Models
-# =============================================================================
-
-class ReviewSubmission(BaseModel):
-    """Review submission request"""
-    q1_worth_saving: str  # "capture" or "ignore"
-    q2_evidence_supported: str  # "supported", "partially_supported", "unknown"
-    q3_sensitivity: str  # "public", "internal", "restricted"
-    q4_status: Optional[str] = None  # "proposed", "accepted", etc.
-    reviewer_notes: Optional[str] = None
-    reviewer: Optional[str] = None
-
-
-# =============================================================================
-# Background Tasks
-# =============================================================================
-
-async def process_message(message: Message):
-    """
-    Process a message through the 3-tier capture pipeline.
-
-    Tier 1: Embedding similarity (local, zero tokens) — wide net
-    Tier 2: LLM policy filter (Haiku, ~200 tokens) — false positive removal
-    Tier 3: LLM extraction (Sonnet, ~500 tokens) — Decision Record building
-    """
-    global detector, tier2_filter, record_builder, envector_client, review_queue, embedding_service
-
-    if not detector or not record_builder:
-        logger.warning("Not initialized, skipping message")
-        return
-
-    # === Tier 1: Embedding similarity (local, free) ===
-    result = detector.detect(message.text)
-
-    if not result.is_significant:
-        return  # Not significant, ignore
-
-    logger.info("Tier 1 PASS (score: %.2f, pattern: \"%.50s...\")", result.confidence, result.matched_pattern)
-
-    # === Tier 2: LLM policy filter (Haiku, cheap) ===
-    if tier2_filter and tier2_filter.is_available:
-        filter_result = tier2_filter.evaluate(
-            text=message.text,
-            tier1_score=result.confidence,
-            tier1_pattern=result.matched_pattern or "",
-        )
-
-        if not filter_result.should_capture:
-            logger.info("Tier 2 REJECT: %s", filter_result.reason)
-            return
-
-        logger.info("Tier 2 PASS: %s", filter_result.reason)
-
-        # Use Tier 2's domain hint if Tier 1's is generic
-        if filter_result.domain != "general" and result.domain in (None, "general"):
-            result.domain = filter_result.domain
-    else:
-        logger.info("Tier 2 skipped (filter unavailable)")
-
-    # === Tier 3: LLM extraction + Decision Record building (Sonnet) ===
-    raw_event = RawEvent(
-        text=message.text,
-        user=message.user,
-        channel=message.channel,
-        timestamp=message.timestamp,
-        source=message.source,
-        thread_ts=message.thread_ts,
-        url=message.url,
-    )
-
-    language = detect_language(message.text)
-    record = record_builder.build(raw_event, result, language=language)
-
-    logger.info("Tier 3 built record: %s (certainty: %s)", record.id, record.why.certainty.value)
-
-    # Decide: auto-capture or review queue
-    if detector.should_auto_capture(result):
-        await store_record(record)
-    else:
-        review_queue.add(record, result.confidence)
-        logger.info("Added to review queue: %s", record.id)
-
-
-async def store_record(record):
-    """Store a Decision Record to enVector"""
-    global envector_client, embedding_service, index_name
-
-    if not envector_client or not embedding_service:
-        logger.warning("Cannot store (no enVector client): %s", record.id)
-        return
-
-    if not index_name:
-        logger.warning("Cannot store (no index name configured): %s", record.id)
-        return
-
-    try:
-        # The payload.text is what we embed
-        text = record.payload.text
-        metadata = record.model_dump(mode='json')
-
-        # Insert to enVector
-        result = envector_client.insert_with_text(
-            index_name=index_name,
-            texts=[text],
-            embedding_service=embedding_service,
-            metadata=[metadata]
-        )
-
-        if result.get("ok"):
-            logger.info("Stored: %s", record.id)
-        else:
-            logger.error("Failed to store %s: %s", record.id, result.get('error'))
-
-    except Exception as e:
-        logger.error("Error storing %s: %s", record.id, e)
-
-
-# =============================================================================
-# Endpoints
-# =============================================================================
-
-@app.get("/health")
-async def health():
-    """Health check endpoint"""
-    return {
-        "status": "healthy",
-        "service": "scribe",
-        "initialized": detector is not None,
-        "pipeline": "3-tier" if (tier2_filter and tier2_filter.is_available) else "1-tier",
-        "tier1_patterns": detector._cache.pattern_count if detector else 0,
-        "tier2_available": tier2_filter.is_available if tier2_filter else False,
-        "tier3_available": record_builder._llm_extractor.is_available if record_builder and record_builder._llm_extractor else False,
-        "pending_reviews": review_queue.get_stats()["pending"] if review_queue else 0,
-    }
-
-
-@app.post("/slack/events")
-async def slack_events(
-    request: Request,
-    background_tasks: BackgroundTasks,
-    x_slack_signature: Optional[str] = Header(None),
-    x_slack_request_timestamp: Optional[str] = Header(None)
-):
-    """
-    Handle Slack webhook events.
-
-    This is the main entry point for Slack integration.
-    """
-    global slack_handler
-
-    if not slack_handler:
-        raise HTTPException(status_code=503, detail="Handler not initialized")
-
-    # Read body
-    body = await request.body()
-
-    # Verify signature
-    if not slack_handler.verify_signature(
-        body,
-        x_slack_signature or "",
-        x_slack_request_timestamp or ""
-    ):
-        raise HTTPException(status_code=401, detail="Invalid signature")
-
-    # Parse JSON
-    try:
-        data = json.loads(body)
-    except json.JSONDecodeError:
-        raise HTTPException(status_code=400, detail="Invalid JSON")
-
-    # Handle URL verification challenge
-    if slack_handler.is_url_verification(data):
-        challenge = slack_handler.get_challenge(data)
-        return JSONResponse({"challenge": challenge})
-
-    # Parse event to message
-    message = await slack_handler.parse_event(data)
-
-    if message and slack_handler.should_process(message):
-        # Process in background (don't block response)
-        background_tasks.add_task(process_message, message)
-
-    # Acknowledge receipt
-    return JSONResponse({"ok": True})
-
-
-@app.post("/notion/events")
-async def notion_events(
-    request: Request,
-    background_tasks: BackgroundTasks,
-    x_notion_signature: Optional[str] = Header(None),
-):
-    """
-    Handle Notion webhook events.
-
-    Receives page.created, page.updated, and database.updated events.
-    """
-    global notion_handler
-
-    if not notion_handler:
-        raise HTTPException(status_code=503, detail="Handler not initialized")
-
-    # Read body
-    body = await request.body()
-
-    # Verify signature
-    if not notion_handler.verify_signature(body, x_notion_signature or "", ""):
-        raise HTTPException(status_code=401, detail="Invalid signature")
-
-    # Parse JSON
-    try:
-        data = json.loads(body)
-    except json.JSONDecodeError:
-        raise HTTPException(status_code=400, detail="Invalid JSON")
-
-    # Parse event to message
-    message = await notion_handler.parse_event(data)
-
-    if message and notion_handler.should_process(message):
-        background_tasks.add_task(process_message, message)
-
-    # Acknowledge receipt
-    return JSONResponse({"ok": True})
-
-
-@app.get("/review")
-async def get_reviews():
-    """Get pending reviews"""
-    global review_queue
-
-    if not review_queue:
-        raise HTTPException(status_code=503, detail="Review queue not initialized")
-
-    pending = review_queue.get_pending()
-
-    return {
-        "pending_count": len(pending),
-        "items": [
-            {
-                "record_id": item.record_id,
-                "confidence": item.detection_confidence,
-                "created_at": item.created_at,
-                "title": item.record_json.get("title", "N/A"),
-                "domain": item.record_json.get("domain", "N/A"),
-            }
-            for item in pending
-        ]
-    }
-
-
-@app.get("/review/{record_id}")
-async def get_review_item(record_id: str):
-    """Get a specific review item"""
-    global review_queue
-
-    if not review_queue:
-        raise HTTPException(status_code=503, detail="Review queue not initialized")
-
-    item = review_queue.get_item(record_id)
-    if not item:
-        raise HTTPException(status_code=404, detail="Item not found")
-
-    return {
-        "record_id": item.record_id,
-        "confidence": item.detection_confidence,
-        "created_at": item.created_at,
-        "status": item.status,
-        "questions": item.questions,
-        "record": item.record_json,
-        "formatted": review_queue.format_for_review(item),
-    }
-
-
-@app.post("/review/{record_id}")
-async def submit_review(record_id: str, submission: ReviewSubmission):
-    """Submit review for an item"""
-    global review_queue
-
-    if not review_queue:
-        raise HTTPException(status_code=503, detail="Review queue not initialized")
-
-    # Convert submission to ReviewAnswers
-    answers = ReviewAnswers(
-        q1_worth_saving=ReviewAnswer(submission.q1_worth_saving),
-        q2_evidence_supported=ReviewAnswer(submission.q2_evidence_supported),
-        q3_sensitivity=ReviewAnswer(submission.q3_sensitivity),
-        q4_status=ReviewAnswer(submission.q4_status) if submission.q4_status else None,
-        reviewer_notes=submission.reviewer_notes,
-    )
-
-    # Submit review
-    record = review_queue.submit_review(
-        record_id=record_id,
-        answers=answers,
-        reviewer=submission.reviewer
-    )
-
-    if record is None:
-        # Item was rejected
-        return {"status": "rejected", "record_id": record_id}
-
-    # Store approved record
-    await store_record(record)
-
-    return {
-        "status": "approved",
-        "record_id": record_id,
-        "stored": True,
-    }
-
-
-@app.delete("/review/{record_id}")
-async def delete_review(record_id: str):
-    """Delete a review item"""
-    global review_queue
-
-    if not review_queue:
-        raise HTTPException(status_code=503, detail="Review queue not initialized")
-
-    removed = review_queue.remove(record_id)
-    if not removed:
-        raise HTTPException(status_code=404, detail="Item not found")
-
-    return {"status": "deleted", "record_id": record_id}
-
-
-@app.get("/stats")
-async def get_stats():
-    """Get Scribe statistics"""
-    global review_queue, detector
-
-    stats = {
-        "service": "scribe",
-        "timestamp": datetime.now(timezone.utc).isoformat(),
-    }
-
-    if review_queue:
-        stats["review_queue"] = review_queue.get_stats()
-
-    if detector:
-        stats["pipeline"] = {
-            "tier1_threshold": detector.threshold,
-            "tier1_patterns": detector._cache.pattern_count,
-            "tier2_enabled": tier2_filter.is_available if tier2_filter else False,
-            "tier3_enabled": record_builder._llm_extractor.is_available if record_builder and record_builder._llm_extractor else False,
-            "auto_capture_threshold": detector.high_confidence_threshold,
-        }
-
-    return stats
-
-
-# =============================================================================
-# CLI Entry Point
-# =============================================================================
-
-def run_server():
-    """Run the Scribe server"""
-    import uvicorn
-
-    config = load_config()
-    port = config.scribe.slack_webhook_port
-
-    logger.info("Starting server on port %d", port)
-    uvicorn.run(
-        "agents.scribe.server:app",
-        host="0.0.0.0",
-        port=port,
-        reload=False,
-    )
-
-
-if __name__ == "__main__":
-    run_server()
diff --git a/agents/scribe/tier2_filter.py b/agents/scribe/tier2_filter.py
deleted file mode 100644
index ed107cf..0000000
--- a/agents/scribe/tier2_filter.py
+++ /dev/null
@@ -1,143 +0,0 @@
-"""
-Tier 2 LLM Filter — Policy-based capture judgment.
-
-After Tier 1 (embedding similarity) finds candidates, Tier 2 uses a small LLM
-(Haiku) to judge whether the message is truly worth capturing as organizational
-memory, based on natural language policy.
-
-Token budget: ~200 tokens per call (policy summary + message + response).
-"""
-
-import logging
-from dataclasses import dataclass
-from typing import Optional
-
-from ..common.llm_client import LLMClient
-from ..common.llm_utils import parse_llm_json
-
-logger = logging.getLogger("rune.scribe.tier2")
-
-
-FILTER_POLICY = """You judge whether a workplace message contains significant organizational knowledge that should be permanently recorded.
-
-CAPTURE if the message contains:
-- A concrete decision with reasoning (technology choice, architecture, process change)
-- A policy or standard being established or changed
-- A trade-off analysis or rejection of an alternative
-- A lesson learned from an incident, failure, or debugging session
-- A commitment or agreement that affects the team
-- Incident postmortem findings, root cause analysis, or corrective actions
-- Debugging breakthroughs: root cause identified, fix applied, workaround found
-- Bug triage outcomes: severity, ownership, or fix strategy decided
-- QA findings that change test strategy or acceptance criteria
-- Legal/compliance decisions or regulatory interpretations
-- Budget allocations, pricing changes, or cost optimization decisions
-- Sales intelligence: deal outcomes, competitive insights, customer requirements
-- Customer escalation resolutions or churn analysis insights
-- Research findings, experiment results, or proof-of-concept conclusions
-- Risk assessments with mitigation strategies
-
-DO NOT CAPTURE:
-- Casual conversation, greetings, or social chat
-- Questions without answers or decisions
-- Status updates without decisions or insights ("still working on X")
-- Vague opinions without commitment ("maybe we should...")
-- Routine alerts/deployments with no decision or learning attached
-
-Respond with JSON only: {"capture": true/false, "reason": "one sentence", "domain": "architecture|security|product|exec|ops|design|data|hr|marketing|incident|debugging|qa|legal|finance|sales|customer_success|research|risk|general"}"""
-
-
-@dataclass
-class FilterResult:
-    """Result of Tier 2 LLM filter."""
-    should_capture: bool
-    reason: str
-    domain: str = "general"
-    raw_response: Optional[str] = None
-
-
-class Tier2Filter:
-    """
-    LLM-based policy filter for capture decisions.
-
-    Uses a small, fast model (Haiku) to evaluate whether a Tier 1 candidate
-    is truly worth capturing as organizational memory.
-    """
-
-    def __init__(
-        self,
-        llm_provider: str = "anthropic",
-        anthropic_api_key: Optional[str] = None,
-        openai_api_key: Optional[str] = None,
-        google_api_key: Optional[str] = None,
-        model: str = "claude-haiku-4-5-20251001",
-    ):
-        self._provider = llm_provider
-        self._model = model
-        self._llm = LLMClient(
-            provider=llm_provider,
-            model=model,
-            anthropic_api_key=anthropic_api_key,
-            openai_api_key=openai_api_key,
-            google_api_key=google_api_key,
-        )
-
-    @property
-    def is_available(self) -> bool:
-        return self._llm.is_available
-
-    def evaluate(self, text: str, tier1_score: float = 0.0, tier1_pattern: str = "") -> FilterResult:
-        """
-        Evaluate whether a message should be captured.
-
-        Args:
-            text: The candidate message text
-            tier1_score: Similarity score from Tier 1 (for context)
-            tier1_pattern: Matched pattern from Tier 1 (for context)
-
-        Returns:
-            FilterResult with capture decision and reasoning
-        """
-        if not self.is_available:
-            # Fallback: skip capture to avoid noise (recoverable via /rune:capture)
-            return FilterResult(
-                should_capture=False,
-                reason="LLM filter unavailable, skipping to avoid noise",
-            )
-
-        try:
-            user_msg = f"<message>\n{text[:500]}\n</message>"
-            if tier1_pattern:
-                user_msg += f"\n(Tier 1 matched pattern: \"{tier1_pattern[:80]}\")"
-
-            raw = self._llm.generate(
-                user_msg,
-                system=FILTER_POLICY,
-                max_tokens=100,
-                timeout=30.0,
-            )
-            return self._parse_response(raw)
-
-        except Exception as e:
-            logger.warning("Evaluation failed: %s", e)
-            # Fallback: skip capture to avoid noise (recoverable via /rune:capture)
-            return FilterResult(
-                should_capture=False,
-                reason=f"LLM filter error ({e}), skipping to avoid noise",
-            )
-
-    def _parse_response(self, raw: str) -> FilterResult:
-        """Parse LLM JSON response."""
-        data = parse_llm_json(raw)
-        if not data:
-            return FilterResult(
-                should_capture=False,
-                reason="Failed to parse LLM response, skipping to avoid noise",
-                raw_response=raw,
-            )
-        return FilterResult(
-            should_capture=bool(data.get("capture", False)),
-            reason=str(data.get("reason", "")),
-            domain=str(data.get("domain", "general")).lower(),
-            raw_response=raw,
-        )
diff --git a/agents/tests/__init__.py b/agents/tests/__init__.py
deleted file mode 100644
index 6d74038..0000000
--- a/agents/tests/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-"""
-Tests for Rune Agents
-
-Unit tests for Scribe and Retriever agents.
-"""
diff --git a/agents/tests/test_agent_delegated.py b/agents/tests/test_agent_delegated.py
deleted file mode 100644
index 8d107ad..0000000
--- a/agents/tests/test_agent_delegated.py
+++ /dev/null
@@ -1,605 +0,0 @@
-"""
-Tests for Agent-Delegated Mode
-
-Tests the pre_extraction path in RecordBuilder and the extracted parameter
-in tool_capture (JSON → ExtractionResult conversion).
-"""
-
-import json
-import pytest
-from unittest.mock import Mock
-
-
-class TestRecordBuilderPreExtraction:
-    """Tests for RecordBuilder.build_phases with pre_extraction parameter"""
-
-    @pytest.fixture
-    def builder(self):
-        from agents.scribe.record_builder import RecordBuilder
-        from agents.common.schemas import Sensitivity
-
-        return RecordBuilder(default_sensitivity=Sensitivity.INTERNAL)
-
-    @pytest.fixture
-    def sample_raw_event(self):
-        from agents.scribe.record_builder import RawEvent
-
-        return RawEvent(
-            text='We decided to use PostgreSQL over MySQL because of better JSON support.',
-            user="alice",
-            channel="architecture",
-            timestamp="1706799600.123456",
-            source="claude_agent",
-        )
-
-    @pytest.fixture
-    def sample_detection(self):
-        from agents.scribe.detector import DetectionResult
-
-        return DetectionResult(
-            is_significant=True,
-            confidence=0.85,
-            matched_pattern="We decided to use",
-            category="architecture",
-            domain="architecture",
-            priority="high",
-        )
-
-    def test_single_pre_extraction(self, builder, sample_raw_event, sample_detection):
-        """Test single record from pre_extraction"""
-        from agents.scribe.llm_extractor import ExtractionResult, ExtractedFields
-
-        pre = ExtractionResult(
-            group_title="Adopt PostgreSQL",
-            status_hint="accepted",
-            tags=["database", "postgresql"],
-            confidence=0.90,
-            single=ExtractedFields(
-                title="Adopt PostgreSQL",
-                rationale="Better JSON support and team familiarity",
-                problem="Need reliable database with JSON support",
-                alternatives=["MySQL", "MongoDB"],
-                trade_offs=["Higher memory usage"],
-                status_hint="accepted",
-                tags=["database", "postgresql"],
-            ),
-        )
-
-        records = builder.build_phases(sample_raw_event, sample_detection, pre_extraction=pre)
-
-        assert len(records) == 1
-        record = records[0]
-        assert "PostgreSQL" in record.title
-        assert record.quality.scribe_confidence == 0.90  # From pre_extraction
-        assert record.payload.text != ""
-        assert record.domain.value == "architecture"
-
-    def test_multi_phase_pre_extraction(self, builder, sample_raw_event, sample_detection):
-        """Test multi-phase records from pre_extraction"""
-        from agents.scribe.llm_extractor import ExtractionResult, PhaseExtractedFields
-
-        pre = ExtractionResult(
-            group_title="Database Strategy",
-            group_type="phase_chain",
-            status_hint="accepted",
-            tags=["database"],
-            confidence=0.85,
-            phases=[
-                PhaseExtractedFields(
-                    phase_title="Requirements Analysis",
-                    phase_decision="Need ACID guarantees",
-                    phase_rationale="Production workload requires consistency",
-                    phase_problem="Current NoSQL limitations",
-                ),
-                PhaseExtractedFields(
-                    phase_title="Technology Selection",
-                    phase_decision="Adopt PostgreSQL",
-                    phase_rationale="Best JSON support among RDBMS",
-                    phase_problem="Need SQL + JSON support",
-                    alternatives=["MySQL", "CockroachDB"],
-                    trade_offs=["Higher memory usage"],
-                    tags=["postgresql"],
-                ),
-            ],
-        )
-
-        records = builder.build_phases(sample_raw_event, sample_detection, pre_extraction=pre)
-
-        assert len(records) == 2
-        # All records share group_id
-        assert records[0].group_id == records[1].group_id
-        assert records[0].group_id is not None
-        # Phase ordering
-        assert records[0].phase_seq == 0
-        assert records[1].phase_seq == 1
-        assert records[0].phase_total == 2
-        # Confidence from pre_extraction
-        assert records[0].quality.scribe_confidence == 0.85
-        assert records[1].quality.scribe_confidence == 0.85
-
-    def test_bundle_pre_extraction(self, builder, sample_raw_event, sample_detection):
-        """Test bundle records from pre_extraction"""
-        from agents.scribe.llm_extractor import ExtractionResult, PhaseExtractedFields
-
-        pre = ExtractionResult(
-            group_title="Auth Strategy",
-            group_type="bundle",
-            status_hint="accepted",
-            tags=["auth", "security"],
-            confidence=0.92,
-            phases=[
-                PhaseExtractedFields(
-                    phase_title="Core Decision",
-                    phase_decision="Use JWT with refresh tokens",
-                    phase_rationale="Stateless, scales with microservices",
-                    phase_problem="Need auth for distributed system",
-                ),
-                PhaseExtractedFields(
-                    phase_title="Alternatives Analysis",
-                    phase_decision="Compared session-based, OAuth2, JWT",
-                    phase_rationale="Sessions don't scale",
-                    phase_problem="",
-                    alternatives=["Session cookies", "OAuth2 server"],
-                ),
-            ],
-        )
-
-        records = builder.build_phases(sample_raw_event, sample_detection, pre_extraction=pre)
-
-        assert len(records) == 2
-        assert records[0].group_type == "bundle"
-        assert records[1].group_type == "bundle"
-        assert records[0].group_id == records[1].group_id
-
-    def test_pre_extraction_without_confidence_uses_detection(self, builder, sample_raw_event, sample_detection):
-        """Test that missing confidence falls back to detection.confidence"""
-        from agents.scribe.llm_extractor import ExtractionResult, ExtractedFields
-
-        pre = ExtractionResult(
-            single=ExtractedFields(
-                title="Test Decision",
-                rationale="Test reason",
-                status_hint="proposed",
-            ),
-        )
-
-        records = builder.build_phases(sample_raw_event, sample_detection, pre_extraction=pre)
-
-        assert len(records) == 1
-        # Should use detection.confidence (0.85), not None
-        assert records[0].quality.scribe_confidence == 0.85
-
-    def test_no_pre_extraction_no_llm_falls_back(self, builder, sample_raw_event, sample_detection):
-        """Test backward compat: no pre_extraction, no LLM → single record via build()"""
-        records = builder.build_phases(sample_raw_event, sample_detection)
-
-        assert len(records) == 1
-        # Should still produce a valid record
-        assert records[0].payload.text != ""
-
-
-class TestExtractedJSONParsing:
-    """Tests for parsing extracted JSON in the capture pipeline"""
-
-    def _parse_extracted(self, json_str: str):
-        """Helper: simulate the JSON→ExtractionResult conversion from server.py"""
-        from agents.common.llm_utils import parse_llm_json
-        from agents.scribe.llm_extractor import (
-            ExtractionResult, ExtractedFields, PhaseExtractedFields,
-        )
-
-        data = parse_llm_json(json_str)
-        if not data:
-            return None, "Invalid JSON"
-
-        # Tier 2 check
-        tier2 = data.get("tier2", {})
-        if not tier2.get("capture", True):
-            return None, f"Rejected: {tier2.get('reason', 'no reason')}"
-
-        agent_confidence = data.get("confidence")
-        if isinstance(agent_confidence, (int, float)):
-            agent_confidence = max(0.0, min(1.0, float(agent_confidence)))
-        else:
-            agent_confidence = None
-
-        phases_data = data.get("phases")
-        if phases_data and len(phases_data) > 1:
-            phases = []
-            for p in phases_data[:7]:
-                phases.append(PhaseExtractedFields(
-                    phase_title=str(p.get("phase_title", ""))[:60],
-                    phase_decision=str(p.get("phase_decision", "")),
-                    phase_rationale=str(p.get("phase_rationale", "")),
-                    phase_problem=str(p.get("phase_problem", "")),
-                    alternatives=[str(a) for a in p.get("alternatives", []) if a],
-                    trade_offs=[str(t) for t in p.get("trade_offs", []) if t],
-                    tags=[str(t).lower() for t in p.get("tags", []) if t],
-                ))
-            result = ExtractionResult(
-                group_title=str(data.get("group_title", ""))[:60],
-                group_type=str(data.get("group_type", "phase_chain")),
-                status_hint=str(data.get("status_hint", "")).lower(),
-                tags=[str(t).lower() for t in data.get("tags", []) if t],
-                confidence=agent_confidence,
-                phases=phases,
-            )
-        else:
-            if phases_data and len(phases_data) == 1:
-                p = phases_data[0]
-                single = ExtractedFields(
-                    title=str(p.get("phase_title", data.get("title", "")))[:60],
-                    rationale=str(p.get("phase_rationale", data.get("rationale", ""))),
-                    problem=str(p.get("phase_problem", data.get("problem", ""))),
-                    alternatives=[str(a) for a in p.get("alternatives", []) if a],
-                    trade_offs=[str(t) for t in p.get("trade_offs", []) if t],
-                    status_hint=str(data.get("status_hint", "")).lower(),
-                    tags=[str(t).lower() for t in p.get("tags", data.get("tags", [])) if t],
-                )
-            else:
-                single = ExtractedFields(
-                    title=str(data.get("title", ""))[:60],
-                    rationale=str(data.get("rationale", "")),
-                    problem=str(data.get("problem", "")),
-                    alternatives=[str(a) for a in data.get("alternatives", []) if a],
-                    trade_offs=[str(t) for t in data.get("trade_offs", []) if t],
-                    status_hint=str(data.get("status_hint", "")).lower(),
-                    tags=[str(t).lower() for t in data.get("tags", []) if t],
-                )
-            result = ExtractionResult(
-                group_title=single.title,
-                status_hint=single.status_hint,
-                tags=single.tags,
-                confidence=agent_confidence,
-                single=single,
-            )
-
-        return result, None
-
-    def test_single_json_parsed(self):
-        """Test single decision JSON parsing"""
-        extracted = json.dumps({
-            "tier2": {"capture": True, "reason": "Technology decision", "domain": "architecture"},
-            "title": "Adopt PostgreSQL",
-            "rationale": "Better JSON support",
-            "problem": "Need reliable database",
-            "alternatives": ["MongoDB", "MySQL"],
-            "trade_offs": ["Higher operational cost"],
-            "status_hint": "accepted",
-            "tags": ["database", "postgresql"],
-            "confidence": 0.85,
-        })
-
-        result, error = self._parse_extracted(extracted)
-
-        assert error is None
-        assert result is not None
-        assert result.single is not None
-        assert result.single.title == "Adopt PostgreSQL"
-        assert result.confidence == 0.85
-        assert not result.is_multi_phase
-
-    def test_multi_phase_json_parsed(self):
-        """Test multi-phase JSON parsing"""
-        extracted = json.dumps({
-            "tier2": {"capture": True, "reason": "Multi-step reasoning", "domain": "architecture"},
-            "group_title": "Database Strategy",
-            "group_type": "phase_chain",
-            "status_hint": "accepted",
-            "tags": ["database"],
-            "confidence": 0.85,
-            "phases": [
-                {
-                    "phase_title": "Requirements",
-                    "phase_decision": "Need ACID",
-                    "phase_rationale": "Production requires consistency",
-                    "phase_problem": "NoSQL limitations",
-                    "alternatives": [],
-                    "trade_offs": [],
-                    "tags": [],
-                },
-                {
-                    "phase_title": "Selection",
-                    "phase_decision": "Use PostgreSQL",
-                    "phase_rationale": "Best JSON support",
-                    "phase_problem": "Need SQL + JSON",
-                    "alternatives": ["MySQL"],
-                    "trade_offs": ["Memory"],
-                    "tags": ["postgresql"],
-                },
-            ],
-        })
-
-        result, error = self._parse_extracted(extracted)
-
-        assert error is None
-        assert result.is_multi_phase
-        assert len(result.phases) == 2
-        assert result.group_type == "phase_chain"
-
-    def test_bundle_json_parsed(self):
-        """Test bundle JSON parsing"""
-        extracted = json.dumps({
-            "tier2": {"capture": True, "reason": "Rich decision", "domain": "security"},
-            "group_title": "Auth Strategy",
-            "group_type": "bundle",
-            "status_hint": "accepted",
-            "tags": ["auth"],
-            "confidence": 0.90,
-            "phases": [
-                {
-                    "phase_title": "Core Decision",
-                    "phase_decision": "Use JWT",
-                    "phase_rationale": "Stateless",
-                    "phase_problem": "Auth needed",
-                    "alternatives": [],
-                    "trade_offs": [],
-                    "tags": [],
-                },
-                {
-                    "phase_title": "Alternatives",
-                    "phase_decision": "Compared options",
-                    "phase_rationale": "Sessions don't scale",
-                    "phase_problem": "",
-                    "alternatives": ["Sessions", "OAuth2"],
-                    "trade_offs": ["JWT size"],
-                    "tags": [],
-                },
-            ],
-        })
-
-        result, error = self._parse_extracted(extracted)
-
-        assert error is None
-        assert result.group_type == "bundle"
-        assert len(result.phases) == 2
-
-    def test_tier2_rejection(self):
-        """Test tier2 capture=false is respected"""
-        extracted = json.dumps({
-            "tier2": {"capture": False, "reason": "Casual chat", "domain": "general"},
-        })
-
-        result, error = self._parse_extracted(extracted)
-
-        assert result is None
-        assert "Rejected" in error
-
-    def test_invalid_json_returns_error(self):
-        """Test invalid JSON string"""
-        result, error = self._parse_extracted("not valid json at all {{{")
-
-        assert result is None
-        assert "Invalid JSON" in error
-
-    def test_confidence_clamped(self):
-        """Test confidence is clamped to 0.0-1.0"""
-        extracted = json.dumps({
-            "tier2": {"capture": True, "reason": "Test", "domain": "general"},
-            "title": "Test",
-            "confidence": 1.5,
-        })
-
-        result, error = self._parse_extracted(extracted)
-
-        assert error is None
-        assert result.confidence == 1.0
-
-    def test_missing_confidence_is_none(self):
-        """Test missing confidence defaults to None"""
-        extracted = json.dumps({
-            "tier2": {"capture": True, "reason": "Test", "domain": "general"},
-            "title": "Test Decision",
-        })
-
-        result, error = self._parse_extracted(extracted)
-
-        assert error is None
-        assert result.confidence is None
-
-    def test_agent_delegated_without_detector(self):
-        """Agent-delegated mode should not require DecisionDetector."""
-        from agents.scribe.detector import DetectionResult
-        from agents.scribe.record_builder import RecordBuilder, RawEvent
-        from agents.scribe.llm_extractor import ExtractionResult, ExtractedFields
-
-        builder = RecordBuilder()
-        raw = RawEvent(
-            text="We decided to use PostgreSQL over MongoDB",
-            user="dev", channel="eng", timestamp="1711000000", source="claude_agent",
-        )
-        # Construct DetectionResult from agent data, no PatternCache needed
-        detection = DetectionResult(
-            is_significant=True,
-            confidence=0.85,
-            domain="architecture",
-            category="architecture",
-        )
-        pre_extraction = ExtractionResult(
-            group_title="Use PostgreSQL over MongoDB",
-            status_hint="accepted",
-            tags=["database", "architecture"],
-            confidence=0.85,
-            single=ExtractedFields(
-                title="Use PostgreSQL over MongoDB",
-                rationale="Better ACID compliance for financial data",
-                problem="Need reliable database for transactions",
-                alternatives=["MongoDB"],
-                trade_offs=["Less flexible schema"],
-                status_hint="accepted",
-                tags=["database"],
-            ),
-        )
-        records = builder.build_phases(raw, detection, pre_extraction=pre_extraction)
-        assert len(records) == 1
-        assert records[0].domain.value == "architecture"
-        assert records[0].quality.scribe_confidence == 0.85
-
-    def test_single_phase_treated_as_single(self):
-        """Test that phases with 1 element is treated as single record"""
-        extracted = json.dumps({
-            "tier2": {"capture": True, "reason": "Single", "domain": "architecture"},
-            "group_title": "Single Phase",
-            "phases": [
-                {
-                    "phase_title": "Only Phase",
-                    "phase_decision": "Do X",
-                    "phase_rationale": "Because Y",
-                    "phase_problem": "Problem Z",
-                },
-            ],
-            "confidence": 0.8,
-        })
-
-        result, error = self._parse_extracted(extracted)
-
-        assert error is None
-        assert not result.is_multi_phase
-        assert result.single is not None
-        assert result.single.title == "Only Phase"
-
-
-def test_reusable_insight_used_for_embedding_text():
-    """When reusable_insight is set, it should be the embedding target."""
-    from agents.common.schemas import DecisionRecord, DecisionDetail, Payload
-    from agents.common.schemas.embedding import embedding_text_for_record
-
-    record = DecisionRecord(
-        id="dec_test",
-        title="Test",
-        decision=DecisionDetail(what="Test"),
-        reusable_insight="Dense gist paragraph for embedding.",
-        payload=Payload(text="# Full markdown\n## Decision\nVerbose content"),
-    )
-    assert embedding_text_for_record(record) == "Dense gist paragraph for embedding."
-
-
-def test_embedding_text_fallback_to_payload():
-    """When reusable_insight is empty, fall back to payload.text."""
-    from agents.common.schemas import DecisionRecord, DecisionDetail, Payload
-    from agents.common.schemas.embedding import embedding_text_for_record
-
-    record = DecisionRecord(
-        id="dec_test",
-        title="Test",
-        decision=DecisionDetail(what="Test"),
-        reusable_insight="",
-        payload=Payload(text="Fallback payload text"),
-    )
-    assert embedding_text_for_record(record) == "Fallback payload text"
-
-
-def test_reusable_insight_flows_to_record():
-    """reusable_insight from agent JSON should appear on the built record."""
-    from agents.scribe.detector import DetectionResult
-    from agents.scribe.record_builder import RecordBuilder, RawEvent
-    from agents.scribe.llm_extractor import ExtractionResult, ExtractedFields
-
-    insight = "We chose PostgreSQL over MongoDB for ACID compliance in financial data."
-    builder = RecordBuilder()
-    raw = RawEvent(text="...", user="dev", channel="eng", timestamp="1711000000", source="claude_agent")
-    detection = DetectionResult(is_significant=True, confidence=0.85, domain="architecture")
-    pre_extraction = ExtractionResult(
-        group_title="PostgreSQL selection",
-        status_hint="accepted",
-        tags=["database"],
-        confidence=0.85,
-        group_summary=insight,
-        single=ExtractedFields(
-            title="PostgreSQL selection",
-            rationale="ACID compliance",
-            status_hint="accepted",
-            tags=["database"],
-        ),
-    )
-    records = builder.build_phases(raw, detection, pre_extraction=pre_extraction)
-    assert records[0].reusable_insight == insight
-
-
-def test_single_record_json_reusable_insight_wiring():
-    """reusable_insight from agent JSON must reach DecisionRecord in single-record path.
-
-    Regression test: the server.py single-record path was missing group_summary,
-    so reusable_insight was always empty for Format A captures.
-    """
-    import json
-    from agents.common.llm_utils import parse_llm_json
-    from agents.scribe.detector import DetectionResult
-    from agents.scribe.record_builder import RecordBuilder, RawEvent
-    from agents.scribe.llm_extractor import ExtractionResult, ExtractedFields
-
-    # Simulate agent JSON (Format A — single decision, no phases)
-    agent_json = {
-        "tier2": {"capture": True, "reason": "Architecture decision", "domain": "architecture"},
-        "title": "Adopt PostgreSQL",
-        "reusable_insight": "We chose PostgreSQL over MongoDB because ACID compliance is critical for financial transaction data. MongoDB was rejected due to eventual consistency risks.",
-        "rationale": "ACID compliance",
-        "problem": "Need reliable database",
-        "alternatives": ["MongoDB"],
-        "trade_offs": ["Less flexible schema"],
-        "status_hint": "accepted",
-        "tags": ["database"],
-        "confidence": 0.9,
-    }
-    data = agent_json
-
-    # Reproduce server.py single-record path (no phases or 0 phases)
-    single = ExtractedFields(
-        title=str(data.get("title", ""))[:60],
-        rationale=str(data.get("rationale", "")),
-        problem=str(data.get("problem", "")),
-        alternatives=[str(a) for a in data.get("alternatives", []) if a],
-        trade_offs=[str(t) for t in data.get("trade_offs", []) if t],
-        status_hint=str(data.get("status_hint", "")).lower(),
-        tags=[str(t).lower() for t in data.get("tags", []) if t],
-    )
-    pre_extraction = ExtractionResult(
-        group_title=single.title,
-        group_summary=str(data.get("reusable_insight", "")) or "",
-        status_hint=single.status_hint,
-        tags=single.tags,
-        confidence=0.9,
-        single=single,
-    )
-
-    builder = RecordBuilder()
-    raw = RawEvent(text="...", user="dev", channel="eng", timestamp="1711000000", source="claude_agent")
-    detection = DetectionResult(is_significant=True, confidence=0.9, domain="architecture")
-
-    records = builder.build_phases(raw, detection, pre_extraction=pre_extraction)
-    assert records[0].reusable_insight == agent_json["reusable_insight"]
-
-
-def test_embedding_text_for_metadata_dict():
-    """_embedding_text_for_record should work with metadata dicts (delete_capture path)."""
-    from agents.common.schemas.embedding import embedding_text_for_record as _embedding_text_for_record
-
-    class FakeRecord:
-        reusable_insight = "Dense gist about PostgreSQL choice."
-        class payload:
-            text = "# Full verbose markdown\n## Decision\nLong content..."
-
-    assert _embedding_text_for_record(FakeRecord()) == "Dense gist about PostgreSQL choice."
-
-    class FakeRecordLegacy:
-        reusable_insight = ""
-        class payload:
-            text = "Fallback payload text"
-
-    assert _embedding_text_for_record(FakeRecordLegacy()) == "Fallback payload text"
-
-
-def test_delete_embedding_text_selection():
-    """delete_capture should use reusable_insight from metadata for embedding."""
-    def select_delete_embedding_text(metadata, fallback_payload_text):
-        ri = metadata.get("reusable_insight", "")
-        return ri.strip() if ri and ri.strip() else fallback_payload_text
-
-    metadata_21 = {"reusable_insight": "Dense gist.", "payload": {"text": "Verbose."}}
-    assert select_delete_embedding_text(metadata_21, "Verbose.") == "Dense gist."
-
-    metadata_20 = {"payload": {"text": "Verbose."}}
-    assert select_delete_embedding_text(metadata_20, "Verbose.") == "Verbose."
-
-    metadata_empty = {"reusable_insight": "", "payload": {"text": "Verbose."}}
-    assert select_delete_embedding_text(metadata_empty, "Verbose.") == "Verbose."
diff --git a/agents/tests/test_batch_capture.py b/agents/tests/test_batch_capture.py
deleted file mode 100644
index 3eddab4..0000000
--- a/agents/tests/test_batch_capture.py
+++ /dev/null
@@ -1,308 +0,0 @@
-"""
-Tests for batch_capture MCP tool
-
-Structural and validation tests for the batch_capture input format.
-These verify item construction and validation logic without server interaction.
-Also includes integration tests that exercise batch_capture logic with mocks.
-"""
-
-import json
-import pytest
-
-def _make_item(title="Test Decision", domain="architecture", capture=True, confidence=0.85):
-    """Helper to build a single extracted item dict."""
-    return {
-        "tier2": {"capture": capture, "reason": "Test", "domain": domain},
-        "title": title,
-        "reusable_insight": f"We decided {title.lower()} because of reasons.",
-        "rationale": "Good rationale",
-        "problem": "The problem we faced",
-        "alternatives": ["Alt A"],
-        "trade_offs": ["Trade-off 1"],
-        "status_hint": "accepted",
-        "tags": ["test"],
-        "confidence": confidence,
-    }
-
-
-class TestBatchCaptureValidation:
-    """Input validation tests for batch_capture."""
-
-    def test_empty_items_returns_zero(self):
-        """Empty array should be handled gracefully."""
-        items = []
-        assert len(items) == 0
-        # A batch with no items should produce zero results
-        results = [_make_item() for _ in items]
-        assert results == []
-
-    def test_items_json_must_be_list(self):
-        """Non-list input should be detected."""
-        bad_inputs = [
-            "not a list",
-            {"title": "single dict, not wrapped in list"},
-            42,
-            None,
-        ]
-        for bad in bad_inputs:
-            assert not isinstance(bad, list), f"Expected non-list, got {type(bad)}"
-
-
-class TestBatchCaptureIntegration:
-    """Structure tests verifying item format for batch_capture."""
-
-    def test_single_item_captured(self):
-        """One novel item has correct structure."""
-        item = _make_item(title="Use PostgreSQL", domain="architecture")
-
-        assert item["tier2"]["capture"] is True
-        assert item["tier2"]["domain"] == "architecture"
-        assert item["title"] == "Use PostgreSQL"
-        assert "postgresql" in item["reusable_insight"]
-        assert item["confidence"] == 0.85
-        assert isinstance(item["alternatives"], list)
-        assert isinstance(item["trade_offs"], list)
-        assert isinstance(item["tags"], list)
-
-    def test_duplicate_item_skipped(self):
-        """Item with 'duplicate' characteristics should be identifiable."""
-        item_a = _make_item(title="Use PostgreSQL")
-        item_b = _make_item(title="Use PostgreSQL")
-
-        # Same title signals a potential duplicate
-        assert item_a["title"] == item_b["title"]
-        assert item_a["reusable_insight"] == item_b["reusable_insight"]
-
-    def test_mixed_batch_partial_success(self):
-        """Batch with 3 items: verify each has required fields."""
-        items = [
-            _make_item(title="Decision A", domain="architecture"),
-            _make_item(title="Decision B", domain="security"),
-            _make_item(title="Decision C", domain="infrastructure"),
-        ]
-
-        assert len(items) == 3
-        required_keys = {"tier2", "title", "reusable_insight", "rationale",
-                         "problem", "alternatives", "trade_offs",
-                         "status_hint", "tags", "confidence"}
-        for item in items:
-            assert required_keys.issubset(item.keys()), (
-                f"Missing keys: {required_keys - item.keys()}"
-            )
-            assert item["tier2"]["capture"] is True
-
-    def test_item_error_does_not_abort_batch(self):
-        """Batch with a bad item: the bad item is identifiable without affecting others."""
-        good_a = _make_item(title="Good Decision A")
-        bad_item = {"title": "Missing required fields"}  # no tier2, no rationale, etc.
-        good_b = _make_item(title="Good Decision B")
-
-        batch = [good_a, bad_item, good_b]
-        assert len(batch) == 3
-
-        # Good items have tier2; bad item does not
-        valid = [i for i in batch if "tier2" in i and isinstance(i["tier2"], dict)]
-        invalid = [i for i in batch if "tier2" not in i or not isinstance(i["tier2"], dict)]
-
-        assert len(valid) == 2
-        assert len(invalid) == 1
-        assert invalid[0]["title"] == "Missing required fields"
-
-    def test_rejected_item_in_batch(self):
-        """Item with tier2.capture=false should be identifiable as rejected."""
-        approved = _make_item(title="Approved Decision")
-        rejected = _make_item(title="Casual Chat", capture=False)
-
-        batch = [approved, rejected]
-
-        capturable = [i for i in batch if i["tier2"]["capture"] is True]
-        skipped = [i for i in batch if i["tier2"]["capture"] is False]
-
-        assert len(capturable) == 1
-        assert capturable[0]["title"] == "Approved Decision"
-        assert len(skipped) == 1
-        assert skipped[0]["title"] == "Casual Chat"
-
-
-class TestCaptureSingleRefactor:
-    """Verify _capture_single exists as a method on MCPServerApp."""
-
-    def test_capture_single_is_method(self):
-        """After refactor, MCPServerApp should have _capture_single."""
-        import os, sys, inspect
-        # mcp/server/ lives two levels up from agents/tests/
-        mcp_root = os.path.normpath(
-            os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, "mcp")
-        )
-        if mcp_root not in sys.path:
-            sys.path.insert(0, mcp_root)
-        from server.server import MCPServerApp
-        assert hasattr(MCPServerApp, '_capture_single')
-        assert inspect.iscoroutinefunction(MCPServerApp._capture_single)
-
-
-class TestBatchCaptureTool:
-    """Test batch_capture MCP tool logic with mocks."""
-
-    @pytest.mark.asyncio
-    async def test_batch_processes_each_item(self):
-        """Verify batch iterates items and collects per-item results."""
-        items = [
-            _make_item("Novel Decision"),
-            _make_item("Another Novel"),
-        ]
-
-        # Simulate what batch_capture does: iterate and collect results
-        results = []
-        for i, item in enumerate(items):
-            # Simulate _capture_single returning success
-            result = {"ok": True, "captured": True, "record_id": f"dec_test_{i}", "novelty": {"class": "novel", "score": 0.9}}
-            results.append({"index": i, "title": item["title"], "status": "captured", "novelty": "novel"})
-
-        assert len(results) == 2
-        assert all(r["status"] == "captured" for r in results)
-
-    @pytest.mark.asyncio
-    async def test_batch_empty_returns_zero(self):
-        """Empty batch returns immediately."""
-        items_list = []
-        result = {
-            "ok": True, "total": 0, "results": [],
-            "captured": 0, "skipped": 0, "errors": 0,
-        }
-        assert result["total"] == 0
-        assert result["ok"] is True
-
-    @pytest.mark.asyncio
-    async def test_batch_invalid_json_returns_error(self):
-        """Invalid JSON string should produce an error response."""
-        bad_json = "not valid json ["
-        try:
-            items_list = json.loads(bad_json)
-            parsed = True
-        except json.JSONDecodeError:
-            parsed = False
-        assert parsed is False
-
-    @pytest.mark.asyncio
-    async def test_batch_non_list_json_returns_error(self):
-        """JSON that parses to a non-list should be rejected."""
-        non_list = json.dumps({"title": "single dict"})
-        items_list = json.loads(non_list)
-        assert not isinstance(items_list, list)
-
-    @pytest.mark.asyncio
-    async def test_batch_aggregates_counts(self):
-        """Verify captured/skipped/errors counts are computed correctly."""
-        results = [
-            {"index": 0, "title": "A", "status": "captured", "novelty": "novel"},
-            {"index": 1, "title": "B", "status": "near_duplicate", "novelty": "near_duplicate"},
-            {"index": 2, "title": "C", "status": "error", "error": "boom"},
-            {"index": 3, "title": "D", "status": "captured", "novelty": "novel"},
-            {"index": 4, "title": "E", "status": "skipped", "novelty": ""},
-        ]
-        captured = sum(1 for r in results if r["status"] == "captured")
-        skipped = sum(1 for r in results if r["status"] in ("skipped", "near_duplicate"))
-        errors = sum(1 for r in results if r["status"] == "error")
-
-        assert captured == 2
-        assert skipped == 2
-        assert errors == 1
-
-    @pytest.mark.asyncio
-    async def test_batch_error_does_not_abort_others(self):
-        """One failed item should not prevent other items from being processed."""
-        statuses = []
-        items = [
-            _make_item("Good A"),
-            {"bad": "item"},  # will fail — no title, no tier2
-            _make_item("Good B"),
-        ]
-        for i, item in enumerate(items):
-            try:
-                title = item.get("title", "") if isinstance(item, dict) else ""
-                if "tier2" not in item:
-                    raise ValueError("Missing tier2 field")
-                statuses.append("captured")
-            except Exception:
-                statuses.append("error")
-
-        assert statuses == ["captured", "error", "captured"]
-
-
-class TestBatchCaptureE2E:
-    """End-to-end smoke tests for batch_capture feature."""
-
-    def test_output_format_matches_design_spec(self):
-        """Verify return schema matches design doc specification."""
-        result = {
-            "ok": True,
-            "total": 3,
-            "results": [
-                {"index": 0, "title": "Decision A", "status": "captured", "novelty": "novel"},
-                {"index": 1, "title": "Decision B", "status": "near_duplicate", "novelty": "near_duplicate"},
-                {"index": 2, "title": "Decision C", "status": "error", "error": "Some failure"},
-            ],
-            "captured": 1,
-            "skipped": 1,
-            "errors": 1,
-        }
-
-        # Schema validation
-        assert result["ok"] is True
-        assert result["total"] == 3
-        assert result["captured"] + result["skipped"] + result["errors"] == result["total"]
-        assert len(result["results"]) == result["total"]
-
-        # Per-item required fields
-        for r in result["results"]:
-            assert "index" in r
-            assert "title" in r
-            assert "status" in r
-            assert r["status"] in ("captured", "near_duplicate", "skipped", "error")
-
-        # Error items have error field
-        error_items = [r for r in result["results"] if r["status"] == "error"]
-        for r in error_items:
-            assert "error" in r
-
-    def test_scribe_prompts_contain_batch_capture(self):
-        """Verify all scribe prompts mention batch_capture and Session-End Sweep."""
-        with open("agents/claude/scribe.md") as f:
-            claude_scribe = f.read()
-        assert "batch_capture" in claude_scribe
-        assert "Session-End Sweep" in claude_scribe
-        assert "claude_agent" in claude_scribe
-
-        with open("agents/gemini/scribe.md") as f:
-            gemini_scribe = f.read()
-        assert "batch_capture" in gemini_scribe
-        assert "Session-End Sweep" in gemini_scribe
-        assert "gemini_agent" in gemini_scribe
-
-        with open("agents/codex/scribe.md") as f:
-            codex_scribe = f.read()
-        assert "batch_capture" in codex_scribe
-        assert "Session-End Sweep" in codex_scribe
-        assert "codex_agent" in codex_scribe
-
-    def test_batch_capture_item_format_compatible_with_capture(self):
-        """Verify _make_item() produces format compatible with capture tool's extracted param."""
-        import json
-        item = _make_item("Test Compatibility")
-
-        # Must be JSON-serializable
-        serialized = json.dumps(item)
-        deserialized = json.loads(serialized)
-
-        # Required fields for agent-delegated capture
-        assert "tier2" in deserialized
-        assert "capture" in deserialized["tier2"]
-        assert "domain" in deserialized["tier2"]
-        assert "title" in deserialized
-        assert "reusable_insight" in deserialized
-        assert len(deserialized["reusable_insight"]) > 0
-        assert "status_hint" in deserialized
-        assert "confidence" in deserialized
-        assert isinstance(deserialized["confidence"], (int, float))
-        assert 0.0 <= deserialized["confidence"] <= 1.0
diff --git a/agents/tests/test_config.py b/agents/tests/test_config.py
deleted file mode 100644
index 543af89..0000000
--- a/agents/tests/test_config.py
+++ /dev/null
@@ -1,317 +0,0 @@
-"""Tests for config restructuring -- LLMConfig and migration."""
-
-import json
-import os
-import pytest
-from unittest.mock import patch
-
-
-class TestCredentialOverrideConfig:
-    """Regression tests for the reconfigure-then-reload_pipelines bug.
-
-    Previously, env vars (RUNEVAULT_TOKEN, etc.) silently overrode
-    config.json values on every load_config() call. This meant that after
-    /rune:configure wrote a new token to disk, reload_pipelines still used
-    the old token from the process environment — producing a misleading
-    "Vault key fetch failed" error instead of picking up the new credential.
-    """
-
-    @pytest.mark.parametrize("env_var,field_path,env_val,config_val", [
-        ("RUNEVAULT_TOKEN",   ("vault", "token"),             "old-token",    "new-token"),
-        ("RUNEVAULT_ENDPOINT",("vault", "endpoint"),          "tcp://old:50051", "tcp://new:50051"),
-        ("VAULT_CA_CERT",     ("vault", "ca_cert"),           "/old/ca.pem",  "/new/ca.pem"),
-        ("VAULT_TLS_DISABLE", ("vault", "tls_disable"),       "true",         False),
-    ])
-    def test_credential_not_overridden_by_env(self, tmp_path, env_var, field_path, env_val, config_val):
-        """configure-managed credentials must come from config.json, not env vars."""
-        from agents.common.config import load_config
-
-        section, field = field_path
-        config_file = tmp_path / "config.json"
-        config_file.write_text(json.dumps({section: {field: config_val}}))
-
-        with patch("agents.common.config.CONFIG_PATH", config_file), \
-             patch.dict(os.environ, {env_var: env_val}, clear=False):
-            cfg = load_config()
-
-        actual = getattr(getattr(cfg, section), field)
-        assert actual == config_val
-
-    @pytest.mark.parametrize("section,field,old_val,new_val", [
-        ("vault",    "token",    "old-token",       "new-token"),
-        ("vault",    "endpoint", "tcp://old:50051", "tcp://new:50051"),
-        ("vault",    "ca_cert",  "/old/ca.pem",     "/new/ca.pem"),
-        ("vault",    "tls_disable", False,           True),
-    ])
-    def test_reload_picks_up_reconfigured_credential(self, tmp_path, section, field, old_val, new_val):
-        """Simulates reload_pipelines after reconfigure: second load_config() must reflect updated credential."""
-        from agents.common.config import load_config
-
-        config_file = tmp_path / "config.json"
-
-        config_file.write_text(json.dumps({section: {field: old_val}, "state": "active"}))
-        with patch("agents.common.config.CONFIG_PATH", config_file):
-            cfg1 = load_config()
-            assert getattr(getattr(cfg1, section), field) == old_val
-
-            # Simulate /rune:configure writing new credential
-            config_file.write_text(json.dumps({section: {field: new_val}, "state": "active"}))
-
-            # reload_pipelines calls load_config() again
-            cfg2 = load_config()
-            assert getattr(getattr(cfg2, section), field) == new_val
-
-
-class TestLLMConfig:
-    def test_llm_config_defaults(self):
-        from agents.common.config import LLMConfig
-        cfg = LLMConfig()
-        assert cfg.provider == "anthropic"
-        assert cfg.tier2_provider == "anthropic"
-        assert cfg.anthropic_api_key == ""
-        assert cfg.openai_tier2_model == ""
-        assert cfg.google_tier2_model == ""
-
-    def test_load_config_new_llm_section(self, tmp_path):
-        from agents.common.config import load_config
-        config_data = {
-            "llm": {
-                "provider": "openai",
-                "openai_api_key": "sk-test",
-                "openai_model": "gpt-4o",
-            },
-            "state": "active",
-        }
-        config_file = tmp_path / "config.json"
-        config_file.write_text(json.dumps(config_data))
-
-        with patch("agents.common.config.CONFIG_PATH", config_file):
-            cfg = load_config()
-
-        assert cfg.llm.provider == "openai"
-        assert cfg.llm.openai_api_key == "sk-test"
-
-    def test_load_config_migrates_from_retriever(self, tmp_path):
-        from agents.common.config import load_config
-        config_data = {
-            "retriever": {
-                "llm_provider": "openai",
-                "openai_api_key": "sk-old",
-                "openai_model": "gpt-4o-mini",
-                "topk": 10,
-            },
-            "scribe": {
-                "tier2_provider": "openai",
-            },
-            "state": "active",
-        }
-        config_file = tmp_path / "config.json"
-        config_file.write_text(json.dumps(config_data))
-
-        with patch("agents.common.config.CONFIG_PATH", config_file):
-            cfg = load_config()
-
-        assert cfg.llm.provider == "openai"
-        assert cfg.llm.openai_api_key == "sk-old"
-        assert cfg.llm.tier2_provider == "openai"
-        assert cfg.retriever.topk == 10
-
-    def test_env_var_overrides_llm_config(self, tmp_path):
-        from agents.common.config import load_config
-        config_file = tmp_path / "config.json"
-        config_file.write_text("{}")
-
-        env = {"OPENAI_API_KEY": "sk-env", "RUNE_LLM_PROVIDER": "openai"}
-        with patch("agents.common.config.CONFIG_PATH", config_file), \
-             patch.dict(os.environ, env, clear=False):
-            cfg = load_config()
-
-        assert cfg.llm.openai_api_key == "sk-env"
-        assert cfg.llm.provider == "openai"
-
-    def test_save_config_omits_env_keys(self, tmp_path):
-        from agents.common.config import load_config, save_config
-        config_file = tmp_path / "config.json"
-        config_file.write_text("{}")
-
-        env = {"ANTHROPIC_API_KEY": "sk-from-env"}
-        with patch("agents.common.config.CONFIG_PATH", config_file), \
-             patch("agents.common.config.CONFIG_DIR", tmp_path), \
-             patch.dict(os.environ, env, clear=False):
-            cfg = load_config()
-            save_config(cfg)
-
-        saved = json.loads(config_file.read_text())
-        assert saved.get("llm", {}).get("anthropic_api_key", "") == ""
-
-    def test_save_config_writes_llm_section(self, tmp_path):
-        """save_config should write an 'llm' section, not embed keys in retriever."""
-        from agents.common.config import load_config, save_config
-        config_data = {
-            "llm": {
-                "provider": "openai",
-                "openai_api_key": "sk-file",
-                "openai_model": "gpt-4o",
-            },
-        }
-        config_file = tmp_path / "config.json"
-        config_file.write_text(json.dumps(config_data))
-
-        with patch("agents.common.config.CONFIG_PATH", config_file), \
-             patch("agents.common.config.CONFIG_DIR", tmp_path):
-            cfg = load_config()
-            save_config(cfg)
-
-        saved = json.loads(config_file.read_text())
-        assert "llm" in saved
-        assert saved["llm"]["provider"] == "openai"
-        assert saved["llm"]["openai_api_key"] == "sk-file"
-        # retriever section should NOT contain LLM keys
-        retriever_section = saved.get("retriever", {})
-        assert "llm_provider" not in retriever_section
-        assert "anthropic_api_key" not in retriever_section
-        assert "openai_api_key" not in retriever_section
-
-    def test_save_config_has_envector_section(self, tmp_path):
-        """save_config must write an envector section (cached from Vault bundle)."""
-        from agents.common.config import load_config, save_config
-        config_file = tmp_path / "config.json"
-        config_file.write_text("{}")
-
-        with patch("agents.common.config.CONFIG_PATH", config_file), \
-             patch("agents.common.config.CONFIG_DIR", tmp_path):
-            cfg = load_config()
-            save_config(cfg)
-
-        saved = json.loads(config_file.read_text())
-        assert "envector" in saved, "envector section should exist in saved config"
-        assert "endpoint" in saved["envector"]
-        assert "api_key" in saved["envector"]
-
-    def test_save_config_no_retriever_llm_keys(self, tmp_path):
-        """Retriever section in saved output must not contain LLM fields."""
-        from agents.common.config import load_config, save_config
-        config_file = tmp_path / "config.json"
-        config_file.write_text("{}")
-
-        with patch("agents.common.config.CONFIG_PATH", config_file), \
-             patch("agents.common.config.CONFIG_DIR", tmp_path):
-            cfg = load_config()
-            save_config(cfg)
-
-        saved = json.loads(config_file.read_text())
-        retriever_section = saved.get("retriever", {})
-        for key in ["llm_provider", "anthropic_api_key", "anthropic_model",
-                     "openai_api_key", "openai_model", "google_api_key", "google_model"]:
-            assert key not in retriever_section, f"{key} should not be in retriever section"
-
-    def test_save_config_no_scribe_tier2_provider(self, tmp_path):
-        """Scribe section in saved output must not contain tier2_provider."""
-        from agents.common.config import load_config, save_config
-        config_file = tmp_path / "config.json"
-        config_file.write_text("{}")
-
-        with patch("agents.common.config.CONFIG_PATH", config_file), \
-             patch("agents.common.config.CONFIG_DIR", tmp_path):
-            cfg = load_config()
-            save_config(cfg)
-
-        saved = json.loads(config_file.read_text())
-        scribe_section = saved.get("scribe", {})
-        assert "tier2_provider" not in scribe_section
-
-    def test_rune_config_has_llm_field(self):
-        """RuneConfig should have an llm field of type LLMConfig."""
-        from agents.common.config import RuneConfig, LLMConfig
-        cfg = RuneConfig()
-        assert isinstance(cfg.llm, LLMConfig)
-
-    def test_rune_config_has_envector_field(self):
-        """RuneConfig should have an envector field (cached from Vault bundle)."""
-        from agents.common.config import RuneConfig, EnVectorConfig
-        cfg = RuneConfig()
-        assert isinstance(cfg.envector, EnVectorConfig)
-
-    def test_retriever_config_no_llm_fields(self):
-        """RetrieverConfig should not have LLM-specific fields."""
-        from agents.common.config import RetrieverConfig
-        cfg = RetrieverConfig()
-        assert not hasattr(cfg, "llm_provider")
-        assert not hasattr(cfg, "anthropic_api_key")
-        assert not hasattr(cfg, "openai_api_key")
-        assert not hasattr(cfg, "google_api_key")
-
-    def test_scribe_config_no_tier2_provider(self):
-        """ScribeConfig should not have tier2_provider."""
-        from agents.common.config import ScribeConfig
-        cfg = ScribeConfig()
-        assert not hasattr(cfg, "tier2_provider")
-
-    def test_gemini_api_key_env_var(self, tmp_path):
-        """GEMINI_API_KEY should also set google_api_key."""
-        from agents.common.config import load_config
-        config_file = tmp_path / "config.json"
-        config_file.write_text("{}")
-
-        env = {"GEMINI_API_KEY": "gem-key"}
-        with patch("agents.common.config.CONFIG_PATH", config_file), \
-             patch.dict(os.environ, env, clear=False):
-            cfg = load_config()
-
-        assert cfg.llm.google_api_key == "gem-key"
-
-    def test_tier2_env_var_override(self, tmp_path):
-        """RUNE_TIER2_LLM_PROVIDER env var should set llm.tier2_provider."""
-        from agents.common.config import load_config
-        config_file = tmp_path / "config.json"
-        config_file.write_text("{}")
-
-        env = {"RUNE_TIER2_LLM_PROVIDER": "openai"}
-        with patch("agents.common.config.CONFIG_PATH", config_file), \
-             patch.dict(os.environ, env, clear=False):
-            cfg = load_config()
-
-        assert cfg.llm.tier2_provider == "openai"
-
-    def test_parse_llm_config_new_section(self):
-        """_parse_llm_config reads from data['llm'] when present."""
-        from agents.common.config import _parse_llm_config
-        data = {
-            "llm": {
-                "provider": "google",
-                "google_api_key": "gk-123",
-                "google_model": "gemini-pro",
-                "tier2_provider": "openai",
-            }
-        }
-        llm = _parse_llm_config(data)
-        assert llm.provider == "google"
-        assert llm.google_api_key == "gk-123"
-        assert llm.tier2_provider == "openai"
-
-    def test_parse_llm_config_fallback_to_retriever(self):
-        """_parse_llm_config falls back to retriever fields for migration."""
-        from agents.common.config import _parse_llm_config
-        data = {
-            "retriever": {
-                "llm_provider": "anthropic",
-                "anthropic_api_key": "ak-old",
-                "anthropic_model": "claude-3-haiku",
-                "openai_api_key": "ok-old",
-                "openai_model": "gpt-3.5-turbo",
-                "google_api_key": "gk-old",
-                "google_model": "gemini-1.0",
-            },
-            "scribe": {
-                "tier2_provider": "google",
-            },
-        }
-        llm = _parse_llm_config(data)
-        assert llm.provider == "anthropic"
-        assert llm.anthropic_api_key == "ak-old"
-        assert llm.anthropic_model == "claude-3-haiku"
-        assert llm.openai_api_key == "ok-old"
-        assert llm.openai_model == "gpt-3.5-turbo"
-        assert llm.google_api_key == "gk-old"
-        assert llm.google_model == "gemini-1.0"
-        assert llm.tier2_provider == "google"
diff --git a/agents/tests/test_detector.py b/agents/tests/test_detector.py
deleted file mode 100644
index f4a0fb6..0000000
--- a/agents/tests/test_detector.py
+++ /dev/null
@@ -1,201 +0,0 @@
-"""
-Tests for Decision Detector
-
-Tests pattern-based decision detection using similarity search.
-"""
-
-import pytest
-from unittest.mock import Mock, MagicMock
-
-
-class TestDetectionResult:
-    """Tests for DetectionResult dataclass"""
-
-    def test_detection_result_significant(self):
-        from agents.scribe.detector import DetectionResult
-
-        result = DetectionResult(
-            is_significant=True,
-            confidence=0.85,
-            matched_pattern="We decided to use",
-            category="architecture",
-            domain="architecture",
-            priority="high",
-        )
-
-        assert result.is_significant is True
-        assert result.confidence == 0.85
-        assert result.matched_pattern == "We decided to use"
-
-    def test_detection_result_not_significant(self):
-        from agents.scribe.detector import DetectionResult
-
-        result = DetectionResult(
-            is_significant=False,
-            confidence=0.3,
-        )
-
-        assert result.is_significant is False
-        assert result.matched_pattern is None
-
-
-class TestDecisionDetector:
-    """Tests for DecisionDetector"""
-
-    @pytest.fixture
-    def mock_pattern_cache(self):
-        """Create a mock pattern cache"""
-        from agents.common.pattern_cache import PatternEntry
-
-        cache = Mock()
-        cache.pattern_count = 10
-
-        # Default behavior: no match
-        cache.find_best_match.return_value = (None, 0.3)
-        cache.find_top_matches.return_value = []
-
-        return cache
-
-    @pytest.fixture
-    def detector(self, mock_pattern_cache):
-        """Create detector with mock cache"""
-        from agents.scribe.detector import DecisionDetector
-
-        return DecisionDetector(
-            pattern_cache=mock_pattern_cache,
-            threshold=0.7,
-            high_confidence_threshold=0.8
-        )
-
-    def test_detect_significant_decision(self, detector, mock_pattern_cache):
-        """Test detecting a significant decision"""
-        from agents.common.pattern_cache import PatternEntry
-
-        # Mock a match
-        matched_pattern = PatternEntry(
-            text="We decided to use",
-            category="architecture",
-            priority="high",
-            embedding=[0.1] * 384,
-            domain="architecture"
-        )
-        mock_pattern_cache.find_best_match.return_value = (matched_pattern, 0.85)
-
-        result = detector.detect("We decided to use PostgreSQL for better JSON support")
-
-        assert result.is_significant is True
-        assert result.confidence == 0.85
-        assert result.matched_pattern == "We decided to use"
-        assert result.category == "architecture"
-
-    def test_detect_not_significant(self, detector, mock_pattern_cache):
-        """Test detecting a non-significant message"""
-        mock_pattern_cache.find_best_match.return_value = (None, 0.3)
-
-        result = detector.detect("Good morning everyone!")
-
-        assert result.is_significant is False
-        assert result.confidence == 0.3
-
-    def test_detect_empty_text(self, detector):
-        """Test with empty text"""
-        result = detector.detect("")
-
-        assert result.is_significant is False
-        assert result.confidence == 0.0
-
-    def test_detect_short_text(self, detector):
-        """Test with very short text"""
-        result = detector.detect("Hi")
-
-        assert result.is_significant is False
-
-    def test_should_auto_capture_high_confidence(self, detector, mock_pattern_cache):
-        """Test auto-capture for high confidence results"""
-        from agents.scribe.detector import DetectionResult
-
-        result = DetectionResult(
-            is_significant=True,
-            confidence=0.9,
-            matched_pattern="We decided to",
-            priority="high",
-        )
-
-        assert detector.should_auto_capture(result) is True
-
-    def test_should_not_auto_capture_low_confidence(self, detector):
-        """Test no auto-capture for low confidence"""
-        from agents.scribe.detector import DetectionResult
-
-        result = DetectionResult(
-            is_significant=True,
-            confidence=0.75,  # Below high_confidence_threshold
-            matched_pattern="We decided to",
-            priority="high",
-        )
-
-        assert detector.should_auto_capture(result) is False
-
-    def test_needs_review_moderate_confidence(self, detector):
-        """Test review needed for moderate confidence"""
-        from agents.scribe.detector import DetectionResult
-
-        result = DetectionResult(
-            is_significant=True,
-            confidence=0.75,
-            matched_pattern="After discussion",
-            priority="medium",
-        )
-
-        assert detector.needs_review(result) is True
-
-    def test_explain_detection(self, detector):
-        """Test detection explanation"""
-        from agents.scribe.detector import DetectionResult
-
-        result = DetectionResult(
-            is_significant=True,
-            confidence=0.85,
-            matched_pattern="We decided to use",
-            category="architecture",
-            domain="architecture",
-            priority="high",
-        )
-
-        explanation = detector.explain_detection(result)
-
-        assert "Significant decision detected" in explanation
-        assert "We decided to use" in explanation
-        assert "architecture" in explanation
-
-
-class TestPatternMatching:
-    """Integration tests for pattern matching (requires embedding service)"""
-
-    @pytest.mark.skip(reason="Requires embedding service - run manually")
-    def test_real_pattern_matching(self):
-        """Test with real embeddings"""
-        from agents.common.embedding_service import EmbeddingService
-        from agents.common.pattern_cache import PatternCache
-        from agents.scribe.detector import DecisionDetector
-        from agents.scribe.pattern_parser import get_builtin_patterns
-
-        # Initialize
-        embedding = EmbeddingService()
-        cache = PatternCache(embedding)
-        cache.load_patterns(get_builtin_patterns())
-
-        detector = DecisionDetector(cache, threshold=0.7)
-
-        # Test decision text
-        result = detector.detect(
-            "We decided to use PostgreSQL instead of MySQL because of better JSON support"
-        )
-
-        assert result.is_significant is True
-        assert result.confidence > 0.7
-
-        # Test routine text
-        result = detector.detect("Good morning team!")
-
-        assert result.is_significant is False
diff --git a/agents/tests/test_language.py b/agents/tests/test_language.py
deleted file mode 100644
index c95ed5e..0000000
--- a/agents/tests/test_language.py
+++ /dev/null
@@ -1,171 +0,0 @@
-"""
-Tests for Language Detection Service
-
-Tests language detection, Unicode script fallback, and LanguageInfo properties.
-"""
-
-import pytest
-
-
-class TestLanguageInfo:
-    """Tests for LanguageInfo dataclass"""
-
-    def test_english_language_info(self):
-        from agents.common.language import LanguageInfo
-
-        info = LanguageInfo(code="en", confidence=0.99, script="Latin")
-
-        assert info.is_english is True
-        assert info.needs_llm_extraction is False
-
-    def test_korean_language_info(self):
-        from agents.common.language import LanguageInfo
-
-        info = LanguageInfo(code="ko", confidence=0.95, script="Hangul")
-
-        assert info.is_english is False
-        assert info.needs_llm_extraction is True
-
-    def test_japanese_language_info(self):
-        from agents.common.language import LanguageInfo
-
-        info = LanguageInfo(code="ja", confidence=0.90, script="Kana")
-
-        assert info.is_english is False
-        assert info.needs_llm_extraction is True
-
-    def test_language_info_is_frozen(self):
-        from agents.common.language import LanguageInfo
-
-        info = LanguageInfo(code="en", confidence=1.0, script="Latin")
-
-        with pytest.raises(AttributeError):
-            info.code = "ko"
-
-
-class TestDetectLanguage:
-    """Tests for detect_language function"""
-
-    def test_detect_english(self):
-        from agents.common.language import detect_language
-
-        result = detect_language("We decided to use PostgreSQL instead of MySQL")
-
-        assert result.code == "en"
-        assert result.is_english is True
-        assert result.confidence > 0.0
-
-    def test_detect_korean(self):
-        from agents.common.language import detect_language
-
-        result = detect_language("PostgreSQL을 사용하기로 결정했다. 팀에서 논의한 결과 성능이 더 좋기 때문이다.")
-
-        assert result.code == "ko"
-        assert result.is_english is False
-        assert result.script == "Hangul"
-
-    def test_detect_japanese(self):
-        from agents.common.language import detect_language
-
-        result = detect_language("PostgreSQLを使うことに決めた。チームで議論した結果、パフォーマンスが良いからだ。")
-
-        assert result.code == "ja"
-        assert result.is_english is False
-        assert result.script in ("Kana", "CJK")
-
-    def test_empty_text_defaults_to_english(self):
-        from agents.common.language import detect_language
-
-        result = detect_language("")
-        assert result.code == "en"
-
-        result = detect_language("   ")
-        assert result.code == "en"
-
-    def test_short_text_defaults_to_english(self):
-        from agents.common.language import detect_language
-
-        result = detect_language("Hello")
-        assert result.code == "en"
-
-    def test_short_korean_detected_by_script(self):
-        from agents.common.language import detect_language
-
-        result = detect_language("결정했다")
-
-        # Even short text should detect Korean via Unicode script
-        assert result.code == "ko"
-        assert result.script == "Hangul"
-
-    def test_short_japanese_detected_by_script(self):
-        from agents.common.language import detect_language
-
-        result = detect_language("決めた")
-
-        # Should detect CJK/Japanese via script
-        assert result.code in ("ja", "zh")
-
-    def test_mixed_english_korean(self):
-        from agents.common.language import detect_language
-
-        result = detect_language("PostgreSQL을 사용하기로 결정했습니다. 이유는 JSON 지원이 좋기 때문입니다.")
-
-        # Should detect Korean (Hangul is dominant script)
-        assert result.code == "ko"
-
-    def test_confidence_range(self):
-        from agents.common.language import detect_language
-
-        result = detect_language("We decided to use PostgreSQL because of better JSON support")
-
-        assert 0.0 <= result.confidence <= 1.0
-
-    def test_none_input(self):
-        from agents.common.language import detect_language
-
-        result = detect_language(None)
-        assert result.code == "en"
-
-
-class TestDetectScript:
-    """Tests for Unicode script detection fallback"""
-
-    def test_hangul_script(self):
-        from agents.common.language import _detect_script
-
-        script, lang = _detect_script("한글 테스트입니다")
-
-        assert script == "Hangul"
-        assert lang == "ko"
-
-    def test_kana_script(self):
-        from agents.common.language import _detect_script
-
-        script, lang = _detect_script("テストです")
-
-        assert script == "Kana"
-        assert lang == "ja"
-
-    def test_latin_script(self):
-        from agents.common.language import _detect_script
-
-        script, lang = _detect_script("Hello World")
-
-        assert script == "Latin"
-        assert lang is None
-
-    def test_cjk_with_kana_is_japanese(self):
-        from agents.common.language import _detect_script
-
-        # Japanese text with both kanji and hiragana
-        script, lang = _detect_script("決定した理由は")
-
-        assert lang == "ja" or script in ("Kana", "CJK")
-
-    def test_empty_text(self):
-        from agents.common.language import _detect_script
-
-        script, lang = _detect_script("")
-
-        assert script == "Latin"
-        assert lang is None
diff --git a/agents/tests/test_llm_client.py b/agents/tests/test_llm_client.py
deleted file mode 100644
index 23a631d..0000000
--- a/agents/tests/test_llm_client.py
+++ /dev/null
@@ -1,45 +0,0 @@
-"""Tests for LLMClient provider abstraction."""
-
-import pytest
-from agents.common.llm_client import LLMClient
-
-
-class TestLLMClientInit:
-    def test_missing_anthropic_key_logs_info(self, caplog):
-        import logging
-        with caplog.at_level(logging.INFO, logger="rune.common.llm_client"):
-            client = LLMClient(provider="anthropic")
-        assert not client.is_available
-        assert "API key not provided" in caplog.text
-
-    def test_missing_openai_key_logs_info(self, caplog):
-        import logging
-        with caplog.at_level(logging.INFO, logger="rune.common.llm_client"):
-            client = LLMClient(provider="openai")
-        assert not client.is_available
-        assert "API key not provided" in caplog.text
-
-    def test_missing_google_key_logs_info(self, caplog):
-        import logging
-        with caplog.at_level(logging.INFO, logger="rune.common.llm_client"):
-            client = LLMClient(provider="google")
-        assert not client.is_available
-        assert "API key not provided" in caplog.text
-
-    def test_auto_provider_raises(self):
-        with pytest.raises(ValueError, match="auto"):
-            LLMClient(provider="auto")
-
-    def test_unsupported_provider_logs_warning(self, caplog):
-        import logging
-        with caplog.at_level(logging.WARNING, logger="rune.common.llm_client"):
-            client = LLMClient(provider="unsupported_xyz")
-        assert not client.is_available
-        assert "Unsupported" in caplog.text
-
-
-class TestLLMClientGenerate:
-    def test_generate_raises_when_unavailable(self):
-        client = LLMClient(provider="anthropic")
-        with pytest.raises(RuntimeError, match="not available"):
-            client.generate("test")
diff --git a/agents/tests/test_llm_utils.py b/agents/tests/test_llm_utils.py
deleted file mode 100644
index 2d57bf3..0000000
--- a/agents/tests/test_llm_utils.py
+++ /dev/null
@@ -1,37 +0,0 @@
-"""Tests for shared LLM response parsing utilities."""
-
-import pytest
-from agents.common.llm_utils import parse_llm_json
-
-
-class TestParseLlmJson:
-    def test_valid_json(self):
-        assert parse_llm_json('{"key": "value"}') == {"key": "value"}
-
-    def test_json_with_markdown_fences(self):
-        raw = '```json\n{"capture": true, "reason": "test"}\n```'
-        result = parse_llm_json(raw)
-        assert result == {"capture": True, "reason": "test"}
-
-    def test_json_with_plain_fences(self):
-        raw = '```\n{"a": 1}\n```'
-        assert parse_llm_json(raw) == {"a": 1}
-
-    def test_json_embedded_in_text(self):
-        raw = 'Here is the result: {"key": "value"} and some trailing text.'
-        assert parse_llm_json(raw) == {"key": "value"}
-
-    def test_no_json_returns_empty_dict(self):
-        assert parse_llm_json("This is not JSON at all") == {}
-
-    def test_empty_string_returns_empty_dict(self):
-        assert parse_llm_json("") == {}
-
-    def test_nested_json(self):
-        raw = '{"phases": [{"title": "A"}, {"title": "B"}]}'
-        result = parse_llm_json(raw)
-        assert len(result["phases"]) == 2
-
-    def test_invalid_json_with_braces_returns_empty(self):
-        raw = '{"broken: json'
-        assert parse_llm_json(raw) == {}
diff --git a/agents/tests/test_novelty_check.py b/agents/tests/test_novelty_check.py
deleted file mode 100644
index 295d612..0000000
--- a/agents/tests/test_novelty_check.py
+++ /dev/null
@@ -1,42 +0,0 @@
-"""Tests for novelty check logic."""
-import pytest
-
-
-def test_classify_novel():
-    """Score below NOVEL_THRESHOLD = novel."""
-    from agents.common.schemas.embedding import classify_novelty
-    result = classify_novelty(max_similarity=0.2, threshold_novel=0.3, threshold_related=0.7)
-    assert result["class"] == "novel"
-    assert result["score"] == pytest.approx(0.8)  # 1 - 0.2
-
-
-def test_classify_evolution():
-    """Score between novel and related thresholds = evolution."""
-    from agents.common.schemas.embedding import classify_novelty
-    result = classify_novelty(max_similarity=0.5, threshold_novel=0.3, threshold_related=0.7)
-    assert result["class"] == "evolution"
-    assert result["score"] == pytest.approx(0.5)
-
-
-def test_classify_related():
-    """Score between related and near_duplicate thresholds = related (annotation only)."""
-    from agents.common.schemas.embedding import classify_novelty
-    result = classify_novelty(max_similarity=0.85, threshold_novel=0.3, threshold_related=0.7, threshold_near_duplicate=0.95)
-    assert result["class"] == "related"
-    assert result["score"] == pytest.approx(0.15)
-
-
-def test_classify_near_duplicate():
-    """Score >= near_duplicate threshold = near_duplicate (blocks capture)."""
-    from agents.common.schemas.embedding import classify_novelty
-    result = classify_novelty(max_similarity=0.97, threshold_novel=0.3, threshold_related=0.7, threshold_near_duplicate=0.95)
-    assert result["class"] == "near_duplicate"
-    assert result["score"] == pytest.approx(0.03)
-
-
-def test_classify_empty_memory():
-    """No existing records = max novelty."""
-    from agents.common.schemas.embedding import classify_novelty
-    result = classify_novelty(max_similarity=0.0, threshold_novel=0.3, threshold_related=0.7)
-    assert result["class"] == "novel"
-    assert result["score"] == pytest.approx(1.0)
diff --git a/agents/tests/test_pattern_parser.py b/agents/tests/test_pattern_parser.py
deleted file mode 100644
index 81536f1..0000000
--- a/agents/tests/test_pattern_parser.py
+++ /dev/null
@@ -1,312 +0,0 @@
-"""
-Tests for Pattern Parser
-
-Tests for parsing capture-triggers.md into structured patterns.
-"""
-
-import pytest
-from pathlib import Path
-from unittest.mock import patch
-
-
-class TestPatternParser:
-    """Tests for pattern parsing functionality"""
-
-    def test_parse_simple_markdown(self, tmp_path):
-        """Test parsing a simple markdown file"""
-        from agents.scribe.pattern_parser import parse_capture_triggers
-
-        md_content = """# Capture Triggers
-
-## Architecture Decisions
-
-### High-Priority
-
-- "We decided to use X instead of Y"
-- "Let's go with option A"
-
-### Medium-Priority
-
-- "After discussion, we chose"
-"""
-        md_file = tmp_path / "test_triggers.md"
-        md_file.write_text(md_content)
-
-        patterns = parse_capture_triggers(str(md_file))
-
-        assert len(patterns) >= 2
-        assert any("We decided to use" in p["text"] for p in patterns)
-
-    def test_parse_with_categories(self, tmp_path):
-        """Test that category headers are correctly parsed"""
-        from agents.scribe.pattern_parser import parse_capture_triggers
-
-        md_content = """# Triggers
-
-## Security Compliance
-
-- "For compliance, we need to implement this policy"
-
-## Product Decisions
-
-- "We decided to prioritize feature X because customer demand"
-"""
-        md_file = tmp_path / "test_triggers.md"
-        md_file.write_text(md_content)
-
-        patterns = parse_capture_triggers(str(md_file))
-
-        security_patterns = [p for p in patterns if p["domain"] == "security"]
-        product_patterns = [p for p in patterns if p["domain"] == "product"]
-
-        assert len(security_patterns) >= 1
-        assert len(product_patterns) >= 1
-
-    def test_parse_priority_detection(self, tmp_path):
-        """Test that priority is correctly detected from section headers"""
-        from agents.scribe.pattern_parser import parse_capture_triggers
-
-        md_content = """# Triggers
-
-## Architecture
-
-### High_Confidence Triggers
-
-- "Critical architecture decision"
-
-### Medium_Confidence Triggers
-
-- "Moderate importance choice"
-"""
-        md_file = tmp_path / "test_triggers.md"
-        md_file.write_text(md_content)
-
-        patterns = parse_capture_triggers(str(md_file))
-
-        high_priority = [p for p in patterns if p["priority"] == "high"]
-        medium_priority = [p for p in patterns if p["priority"] == "medium"]
-
-        assert len(high_priority) >= 1
-        assert len(medium_priority) >= 1
-
-    def test_domain_inference(self):
-        """Test domain inference from category names"""
-        from agents.scribe.pattern_parser import _infer_domain
-
-        assert _infer_domain("architecture_decisions") == "architecture"
-        assert _infer_domain("security_compliance") == "security"
-        assert _infer_domain("product_features") == "product"
-        assert _infer_domain("executive_strategy") == "exec"
-        assert _infer_domain("operations_deployment") == "ops"
-        assert _infer_domain("random_category") == "general"
-
-    def test_normalize_category(self):
-        """Test category normalization"""
-        from agents.scribe.pattern_parser import _normalize_category
-
-        assert _normalize_category("Architecture Decisions") == "architecture_decisions"
-        assert _normalize_category("Security & Compliance") == "security_compliance"
-        assert _normalize_category("  Spaces  Here  ") == "spaces_here"
-
-    def test_parse_backtick_patterns(self, tmp_path):
-        """Test parsing patterns in backticks"""
-        from agents.scribe.pattern_parser import parse_capture_triggers
-
-        md_content = """# Triggers
-
-## Technical
-
-- `We need to implement this feature`
-- `The system should handle X`
-"""
-        md_file = tmp_path / "test_triggers.md"
-        md_file.write_text(md_content)
-
-        patterns = parse_capture_triggers(str(md_file))
-
-        assert len(patterns) >= 1
-        assert any("implement" in p["text"].lower() for p in patterns)
-
-    def test_skip_short_patterns(self, tmp_path):
-        """Test that very short patterns are skipped"""
-        from agents.scribe.pattern_parser import parse_capture_triggers
-
-        md_content = """# Triggers
-
-## General
-
-- "Hi"
-- "OK"
-- "This is a valid pattern to capture"
-"""
-        md_file = tmp_path / "test_triggers.md"
-        md_file.write_text(md_content)
-
-        patterns = parse_capture_triggers(str(md_file))
-
-        # Short patterns should be skipped
-        assert not any(p["text"] == "Hi" for p in patterns)
-        assert not any(p["text"] == "OK" for p in patterns)
-
-    def test_remove_duplicates(self, tmp_path):
-        """Test that duplicate patterns are removed"""
-        from agents.scribe.pattern_parser import parse_capture_triggers
-
-        md_content = """# Triggers
-
-## Category1
-
-- "Same pattern here"
-
-## Category2
-
-- "Same pattern here"
-"""
-        md_file = tmp_path / "test_triggers.md"
-        md_file.write_text(md_content)
-
-        patterns = parse_capture_triggers(str(md_file))
-
-        # Should only have one of the duplicates
-        same_patterns = [p for p in patterns if "Same pattern here" in p["text"]]
-        assert len(same_patterns) == 1
-
-    def test_file_not_found(self):
-        """Test that FileNotFoundError is raised for missing file"""
-        from agents.scribe.pattern_parser import parse_capture_triggers
-
-        with pytest.raises(FileNotFoundError):
-            parse_capture_triggers("/nonexistent/path/triggers.md")
-
-
-class TestBuiltinPatterns:
-    """Tests for builtin fallback patterns"""
-
-    def test_builtin_patterns_available(self):
-        """Test that builtin patterns are available"""
-        from agents.scribe.pattern_parser import get_builtin_patterns
-
-        patterns = get_builtin_patterns()
-
-        assert len(patterns) > 0
-        assert all("text" in p for p in patterns)
-        assert all("category" in p for p in patterns)
-        assert all("priority" in p for p in patterns)
-        assert all("domain" in p for p in patterns)
-
-    def test_builtin_patterns_have_high_priority(self):
-        """Test that builtin patterns include high priority ones"""
-        from agents.scribe.pattern_parser import get_builtin_patterns
-
-        patterns = get_builtin_patterns()
-        high_priority = [p for p in patterns if p["priority"] == "high"]
-
-        assert len(high_priority) > 0
-
-    def test_builtin_patterns_cover_domains(self):
-        """Test that builtin patterns cover multiple domains"""
-        from agents.scribe.pattern_parser import get_builtin_patterns
-
-        patterns = get_builtin_patterns()
-        domains = set(p["domain"] for p in patterns)
-
-        assert "architecture" in domains
-        assert "security" in domains
-        assert "product" in domains
-
-
-class TestLoadDefaultPatterns:
-    """Tests for loading default patterns"""
-
-    def test_load_default_falls_back_to_builtin(self):
-        """Test that load_default_patterns falls back to builtin"""
-        from agents.scribe.pattern_parser import load_default_patterns, get_builtin_patterns
-
-        # Even if file doesn't exist, should return builtin patterns
-        patterns = load_default_patterns()
-
-        assert len(patterns) > 0
-        # Should return at least builtin patterns
-        builtin_count = len(get_builtin_patterns())
-        assert len(patterns) >= builtin_count or len(patterns) > 0
-
-
-class TestLoadAllLanguagePatterns:
-    """Tests for multilingual pattern loading"""
-
-    def test_load_all_includes_base_english(self, tmp_path):
-        """Test that base English patterns are loaded with language='en'"""
-        from agents.scribe.pattern_parser import load_all_language_patterns
-
-        patterns = load_all_language_patterns()
-
-        en_patterns = [p for p in patterns if p.get("language") == "en"]
-        assert len(en_patterns) > 0
-
-    def test_load_all_discovers_language_files(self, tmp_path):
-        """Test that language-specific files are discovered via glob"""
-        from agents.scribe.pattern_parser import parse_capture_triggers, load_all_language_patterns
-
-        patterns = load_all_language_patterns()
-
-        # Should find ko and ja patterns from the project patterns/ directory
-        ko_patterns = [p for p in patterns if p.get("language") == "ko"]
-        ja_patterns = [p for p in patterns if p.get("language") == "ja"]
-
-        assert len(ko_patterns) > 0, "Korean patterns should be loaded"
-        assert len(ja_patterns) > 0, "Japanese patterns should be loaded"
-
-    def test_language_field_attached(self):
-        """Test that every pattern has a language field"""
-        from agents.scribe.pattern_parser import load_all_language_patterns
-
-        patterns = load_all_language_patterns()
-
-        for p in patterns:
-            assert "language" in p, f"Pattern missing language field: {p['text'][:30]}"
-            assert p["language"] in ("en", "ko", "ja"), f"Unexpected language: {p['language']}"
-
-    def test_total_count_greater_than_english_only(self):
-        """Test that multilingual loading yields more patterns than English-only"""
-        from agents.scribe.pattern_parser import load_default_patterns, load_all_language_patterns
-
-        en_only = load_default_patterns()
-        all_langs = load_all_language_patterns()
-
-        assert len(all_langs) > len(en_only)
-
-    def test_load_all_with_custom_files(self, tmp_path):
-        """Test glob discovery with custom pattern files"""
-        from agents.scribe.pattern_parser import parse_capture_triggers
-
-        # Create a base file
-        base_md = tmp_path / "capture-triggers.md"
-        base_md.write_text("""# Triggers
-
-## Architecture
-- "We decided to use X"
-""")
-
-        # Create a Korean file
-        ko_md = tmp_path / "capture-triggers.ko.md"
-        ko_md.write_text("""# 트리거
-
-## 아키텍처
-- "X를 사용하기로 결정했다"
-""")
-
-        # Parse both files
-        base_patterns = parse_capture_triggers(str(base_md))
-        ko_patterns = parse_capture_triggers(str(ko_md))
-
-        assert len(base_patterns) >= 1
-        assert len(ko_patterns) >= 1
-
-    def test_fallback_when_no_files(self):
-        """Test that builtin patterns are returned when no files exist"""
-        from agents.scribe.pattern_parser import get_builtin_patterns, load_all_language_patterns
-
-        # load_all_language_patterns should at least return builtin patterns
-        # even when invoked (since the real patterns/ dir exists in the project)
-        patterns = load_all_language_patterns()
-        assert len(patterns) > 0
diff --git a/agents/tests/test_pipeline_scenario.py b/agents/tests/test_pipeline_scenario.py
deleted file mode 100644
index 3e98353..0000000
--- a/agents/tests/test_pipeline_scenario.py
+++ /dev/null
@@ -1,1141 +0,0 @@
-"""
-3-Person Team Pipeline Scenario Tests
-
-Tests the full 3-tier Scribe capture pipeline and Retriever recall with
-realistic conversation scripts from 3 team members:
-- Alice (CTO): Architecture & infrastructure decisions
-- Bob (EM): Sprint planning, feature prioritization, deployment
-- Charlie (Security Lead): Security policies, compliance, encryption
-
-Each conversation includes:
-- CAPTURE: Real decisions with rationale (should pass all 3 tiers)
-- REJECT: Casual chat, status updates, vague opinions (should be filtered)
-- BORDERLINE: Triggers Tier 1 but Tier 2 should reject (false positives)
-
-Pipeline:
-  Tier 1: Embedding similarity (local, 0 tokens) → wide net
-  Tier 2: LLM policy filter (Haiku, ~200 tokens) → false positive removal
-  Tier 3: LLM extraction (Sonnet, ~500 tokens) → Decision Record building
-"""
-
-import json
-import pytest
-from datetime import datetime, timezone
-from unittest.mock import Mock, MagicMock, patch
-from dataclasses import dataclass
-from typing import List, Optional
-
-
-# ============================================================================
-# Conversation Scripts
-# ============================================================================
-
-@dataclass
-class ConversationLine:
-    """A single line in a conversation script"""
-    text: str
-    user: str
-    channel: str
-    expected_capture: bool  # Should this be captured?
-    expected_domain: Optional[str] = None  # Expected domain if captured
-    category: str = ""  # "decision", "casual", "status", "vague", "borderline"
-    note: str = ""  # Why this is expected to capture/reject
-
-
-# Alice: CTO / Architecture Lead
-ALICE_CONVERSATIONS: List[ConversationLine] = [
-    # --- Real decisions (CAPTURE) ---
-    ConversationLine(
-        text='We decided to use PostgreSQL instead of MongoDB for the main database because we need strong ACID compliance for financial transactions. "PostgreSQL gives us ACID compliance out of the box" said the lead architect. The trade-off is that we lose flexible schema support.',
-        user="alice", channel="#architecture",
-        expected_capture=True, expected_domain="architecture",
-        category="decision",
-        note="Clear decision with rationale, quote, and trade-off",
-    ),
-    ConversationLine(
-        text='After extensive benchmarking, we are going with gRPC for inter-service communication. REST showed 3x higher latency for our use case. We will use Protocol Buffers for schema evolution. The key risk is that browser clients cannot call gRPC directly, so we need a gateway.',
-        user="alice", channel="#architecture",
-        expected_capture=True, expected_domain="architecture",
-        category="decision",
-        note="Technology choice with benchmark data and risk assessment",
-    ),
-    ConversationLine(
-        text='The team has agreed to adopt an event-driven architecture using Kafka for all inter-service communication. "Event sourcing lets us replay state when debugging production issues" was the consensus. Direct API calls between services are now banned except for synchronous reads.',
-        user="alice", channel="#architecture",
-        expected_capture=True, expected_domain="architecture",
-        category="decision",
-        note="Policy establishment with quoted consensus",
-    ),
-    ConversationLine(
-        text='We standardized on TypeScript for all frontend code effective immediately. Our policy is to enforce strict mode and use Zod for runtime validation at API boundaries. No exceptions for new projects.',
-        user="alice", channel="#engineering",
-        expected_capture=True, expected_domain="architecture",
-        category="decision",
-        note="Technology standardization policy",
-    ),
-    ConversationLine(
-        text='After the postmortem on last week\'s outage, we learned that our retry logic was causing cascading failures. The lesson: always implement circuit breakers before adding retries. We are adopting the Resilience4j library for this.',
-        user="alice", channel="#incidents",
-        expected_capture=True, expected_domain="ops",
-        category="decision",
-        note="Lesson learned from incident with concrete action",
-    ),
-
-    # --- Casual / Status (REJECT) ---
-    ConversationLine(
-        text="Good morning team! Hope everyone had a great weekend.",
-        user="alice", channel="#general",
-        expected_capture=False,
-        category="casual",
-        note="Social greeting",
-    ),
-    ConversationLine(
-        text="I'm heading to the dentist after lunch, will be back around 3pm.",
-        user="alice", channel="#general",
-        expected_capture=False,
-        category="casual",
-        note="Personal schedule update",
-    ),
-    ConversationLine(
-        text="The CI build is currently broken on main. Looking into it.",
-        user="alice", channel="#engineering",
-        expected_capture=False,
-        category="status",
-        note="Status update without decision",
-    ),
-    ConversationLine(
-        text="Updated the README with the new setup instructions.",
-        user="alice", channel="#engineering",
-        expected_capture=False,
-        category="status",
-        note="Routine task completion",
-    ),
-
-    # --- Borderline / False Positives (REJECT by Tier 2) ---
-    ConversationLine(
-        text="We decided to order pizza for the team lunch today. Hawaiian vs pepperoni was a tough call but pepperoni won.",
-        user="alice", channel="#random",
-        expected_capture=False,
-        category="borderline",
-        note="Contains 'We decided' pattern but is food order, not org decision",
-    ),
-    ConversationLine(
-        text="Maybe we should consider using Rust sometime. It might be faster than Go for some things.",
-        user="alice", channel="#architecture",
-        expected_capture=False,
-        category="vague",
-        note="Vague opinion without commitment ('maybe', 'sometime', 'might')",
-    ),
-    ConversationLine(
-        text="I think Python is better than Java but that's just my personal preference honestly.",
-        user="alice", channel="#random",
-        expected_capture=False,
-        category="vague",
-        note="Personal opinion without team decision",
-    ),
-]
-
-
-# Bob: Engineering Manager / Sprint Lead
-BOB_CONVERSATIONS: List[ConversationLine] = [
-    # --- Real decisions (CAPTURE) ---
-    ConversationLine(
-        text='For this sprint, we have prioritized the authentication refactoring because it blocks three other features. "The current OAuth implementation does not support PKCE and that is a security risk" said the security lead. We are allocating 2 engineers full-time.',
-        user="bob", channel="#sprint-planning",
-        expected_capture=True, expected_domain="product",
-        category="decision",
-        note="Sprint prioritization with blocking rationale and resource allocation",
-    ),
-    ConversationLine(
-        text='We decided to adopt feature flags using LaunchDarkly instead of building our own system. The business case is clear: we need gradual rollouts for enterprise customers who require zero-downtime deployments. Build vs buy analysis showed 6 months of eng time saved.',
-        user="bob", channel="#engineering",
-        expected_capture=True, expected_domain="product",
-        category="decision",
-        note="Build-vs-buy decision with quantified business case",
-    ),
-    ConversationLine(
-        text='New deployment policy: all changes must go through blue-green deployments. "Deploy to staging before production, and all changes must pass automated smoke tests" is the new mandate. This reduces rollback time from hours to seconds.',
-        user="bob", channel="#devops",
-        expected_capture=True, expected_domain="ops",
-        category="decision",
-        note="New deployment policy with quantified improvement",
-    ),
-    ConversationLine(
-        text='Performance bottleneck identified and resolved: the user dashboard query took 3.2 seconds because of N+1 queries. We are going with DataLoader pattern to batch database calls. "This should bring it under 200ms" per our benchmarks. All teams must use DataLoader for list endpoints going forward.',
-        user="bob", channel="#performance",
-        expected_capture=True, expected_domain="architecture",
-        category="decision",
-        note="Technical decision with benchmark data and new team policy",
-    ),
-    ConversationLine(
-        text='We are deprioritizing the mobile app redesign for Q2. Customer feedback analysis shows enterprise clients care 3x more about API stability than UI polish. The lesson learned: always validate assumptions with data before committing to large projects.',
-        user="bob", channel="#product",
-        expected_capture=True, expected_domain="product",
-        category="decision",
-        note="Prioritization decision with data-backed rationale and lesson",
-    ),
-
-    # --- Casual / Status (REJECT) ---
-    ConversationLine(
-        text="Hey everyone, standup in 5 minutes!",
-        user="bob", channel="#engineering",
-        expected_capture=False,
-        category="casual",
-        note="Meeting reminder",
-    ),
-    ConversationLine(
-        text="Still working on the sprint velocity report. Should have it by EOD.",
-        user="bob", channel="#sprint-planning",
-        expected_capture=False,
-        category="status",
-        note="Status update without decision content",
-    ),
-    ConversationLine(
-        text="Merged the PR for the login page fix. Tests are green.",
-        user="bob", channel="#engineering",
-        expected_capture=False,
-        category="status",
-        note="Routine PR merge notification",
-    ),
-    ConversationLine(
-        text="Reminder: please update your Jira tickets before the end of the sprint.",
-        user="bob", channel="#sprint-planning",
-        expected_capture=False,
-        category="casual",
-        note="Administrative reminder",
-    ),
-
-    # --- Borderline / False Positives (REJECT by Tier 2) ---
-    ConversationLine(
-        text="We decided to move the team offsite to next Thursday because the conference room is booked on Wednesday.",
-        user="bob", channel="#general",
-        expected_capture=False,
-        category="borderline",
-        note="Contains 'We decided' but is scheduling, not org decision",
-    ),
-    ConversationLine(
-        text="I read an article about how Netflix uses chaos engineering. Interesting approach, we should look into that someday.",
-        user="bob", channel="#engineering",
-        expected_capture=False,
-        category="vague",
-        note="Information sharing without decision or commitment",
-    ),
-]
-
-
-# Charlie: Security Lead
-CHARLIE_CONVERSATIONS: List[ConversationLine] = [
-    # --- Real decisions (CAPTURE) ---
-    ConversationLine(
-        text='We must implement mTLS for all inter-service communication by end of Q2. "The security review flagged this as requiring immediate attention" due to our SOC2 compliance audit. All services must present valid certificates issued by our internal CA.',
-        user="charlie", channel="#security",
-        expected_capture=True, expected_domain="security",
-        category="decision",
-        note="Security mandate with compliance deadline and technical requirement",
-    ),
-    ConversationLine(
-        text='Our authentication approach is moving to OIDC with Auth0 as the identity provider. All API keys must be rotated every 90 days and stored in HashiCorp Vault. "No more hardcoded secrets in config files" is the mandate. Violations will block deploys.',
-        user="charlie", channel="#security",
-        expected_capture=True, expected_domain="security",
-        category="decision",
-        note="Authentication policy with enforcement mechanism",
-    ),
-    ConversationLine(
-        text='The encryption strategy is AES-256-GCM for data at rest and TLS 1.3 for data in transit. We chose this combination because it meets both HIPAA and PCI DSS requirements simultaneously. The trade-off: AES-256-GCM has slightly higher CPU overhead than AES-128, but compliance trumps performance here.',
-        user="charlie", channel="#security",
-        expected_capture=True, expected_domain="security",
-        category="decision",
-        note="Encryption policy with compliance rationale and trade-off analysis",
-    ),
-    ConversationLine(
-        text='After the vulnerability disclosure last week, we are implementing a mandatory security review for all PRs that touch authentication, payment, or PII handling code. The review must be completed by a security-certified engineer before merge. No exceptions.',
-        user="charlie", channel="#security",
-        expected_capture=True, expected_domain="security",
-        category="decision",
-        note="New security review policy triggered by incident",
-    ),
-    ConversationLine(
-        text='The security team has decided to implement rate limiting at the API gateway level using Kong. We chose Kong over custom middleware because it provides built-in rate limiting, IP allowlisting, and integrates with our existing Prometheus monitoring. This is critical for our Q3 compliance goals.',
-        user="charlie", channel="#security",
-        expected_capture=True, expected_domain="security",
-        category="decision",
-        note="Tool selection with build-vs-buy rationale",
-    ),
-
-    # --- Casual / Status (REJECT) ---
-    ConversationLine(
-        text="Running the weekly vulnerability scan now. Will share results in the security channel.",
-        user="charlie", channel="#security",
-        expected_capture=False,
-        category="status",
-        note="Routine operational task",
-    ),
-    ConversationLine(
-        text="CVE-2024-12345 was patched in the latest update. We're on the latest version already so no action needed.",
-        user="charlie", channel="#security",
-        expected_capture=False,
-        category="status",
-        note="Informational CVE update, no decision involved",
-    ),
-    ConversationLine(
-        text="Thanks for completing the security training everyone! We had 95% completion rate.",
-        user="charlie", channel="#general",
-        expected_capture=False,
-        category="casual",
-        note="Appreciation message",
-    ),
-
-    # --- Borderline / False Positives (REJECT by Tier 2) ---
-    ConversationLine(
-        text="We decided to skip the security happy hour this Friday because most of the team is remote.",
-        user="charlie", channel="#random",
-        expected_capture=False,
-        category="borderline",
-        note="Contains 'We decided' but is social event, not security policy",
-    ),
-    ConversationLine(
-        text="Maybe we should consider switching to a different password manager. The current one is kind of slow.",
-        user="charlie", channel="#security",
-        expected_capture=False,
-        category="vague",
-        note="Vague suggestion without commitment or analysis",
-    ),
-]
-
-
-ALL_CONVERSATIONS = ALICE_CONVERSATIONS + BOB_CONVERSATIONS + CHARLIE_CONVERSATIONS
-
-
-# ============================================================================
-# Cross-Member Recall Queries
-# ============================================================================
-
-@dataclass
-class RecallQuery:
-    """A recall query from one member about another's context"""
-    searcher: str
-    query: str
-    expected_source_member: str
-    expected_domain: str
-    description: str
-
-
-RECALL_QUERIES: List[RecallQuery] = [
-    # Bob asks about Alice's architecture decisions
-    RecallQuery(
-        searcher="bob",
-        query="Why did we choose PostgreSQL over MongoDB?",
-        expected_source_member="alice",
-        expected_domain="architecture",
-        description="Bob recalls Alice's database decision",
-    ),
-    RecallQuery(
-        searcher="bob",
-        query="What's our approach for inter-service communication?",
-        expected_source_member="alice",
-        expected_domain="architecture",
-        description="Bob recalls Alice's gRPC decision",
-    ),
-    # Alice asks about Charlie's security policies
-    RecallQuery(
-        searcher="alice",
-        query="What are the security requirements for inter-service communication?",
-        expected_source_member="charlie",
-        expected_domain="security",
-        description="Alice recalls Charlie's mTLS requirement",
-    ),
-    RecallQuery(
-        searcher="alice",
-        query="What security considerations apply to data at rest encryption?",
-        expected_source_member="charlie",
-        expected_domain="security",
-        description="Alice recalls Charlie's encryption strategy",
-    ),
-    # Charlie asks about Bob's deployment policy
-    RecallQuery(
-        searcher="charlie",
-        query="What's our process for deploying to production?",
-        expected_source_member="bob",
-        expected_domain="ops",
-        description="Charlie recalls Bob's blue-green deployment policy",
-    ),
-    RecallQuery(
-        searcher="charlie",
-        query="Why did we decide on LaunchDarkly for feature flags?",
-        expected_source_member="bob",
-        expected_domain="product",
-        description="Charlie recalls Bob's LaunchDarkly decision",
-    ),
-]
-
-
-# ============================================================================
-# Tier 2 Mock Helpers
-# ============================================================================
-
-def make_tier2_response(capture: bool, reason: str, domain: str = "general"):
-    """Create a mock Anthropic response for Tier 2"""
-    response = Mock()
-    content_block = Mock()
-    content_block.text = json.dumps({
-        "capture": capture,
-        "reason": reason,
-        "domain": domain,
-    })
-    response.content = [content_block]
-    return response
-
-
-def simulate_tier2_judgment(text: str) -> dict:
-    """
-    Simulate Tier 2 Haiku judgment based on text content.
-    This mimics what a real Haiku call would return.
-    """
-    text_lower = text.lower()
-
-    # Clear casual / social
-    casual_signals = [
-        "good morning", "hope everyone", "heading to the dentist",
-        "standup in", "hey everyone", "thanks for completing",
-        "happy hour", "pizza for the team", "team lunch",
-        "conference room is booked", "offsite to next",
-    ]
-    for signal in casual_signals:
-        if signal in text_lower:
-            return {"capture": False, "reason": "Social/scheduling message, not an organizational decision", "domain": "general"}
-
-    # Status updates
-    status_signals = [
-        "still working on", "looking into it", "updated the readme",
-        "merged the pr", "tests are green", "running the weekly",
-        "was patched", "no action needed", "please update your jira",
-        "should have it by eod", "currently broken",
-    ]
-    for signal in status_signals:
-        if signal in text_lower:
-            return {"capture": False, "reason": "Status update without decision content", "domain": "general"}
-
-    # Vague opinions
-    vague_signals = [
-        "maybe we should", "might be", "sometime", "should look into that someday",
-        "just my personal preference", "kind of slow",
-        "interesting approach",
-    ]
-    for signal in vague_signals:
-        if signal in text_lower:
-            return {"capture": False, "reason": "Vague opinion without commitment or concrete decision", "domain": "general"}
-
-    # Read article / info sharing without decision
-    if "i read an article" in text_lower and "decided" not in text_lower:
-        return {"capture": False, "reason": "Information sharing without decision", "domain": "general"}
-
-    # Real decisions — determine domain
-    domain = "general"
-    if any(w in text_lower for w in ["security", "mtls", "encryption", "authentication", "compliance", "vulnerability", "rate limiting"]):
-        domain = "security"
-    elif any(w in text_lower for w in ["architecture", "postgresql", "grpc", "kafka", "microservice", "typescript", "event-driven"]):
-        domain = "architecture"
-    elif any(w in text_lower for w in ["deployment", "devops", "ci/cd", "blue-green"]):
-        domain = "ops"
-    elif any(w in text_lower for w in ["sprint", "feature flag", "prioritiz", "customer feedback", "mobile app"]):
-        domain = "product"
-    elif any(w in text_lower for w in ["performance", "bottleneck", "latency", "benchmark"]):
-        domain = "architecture"
-    elif any(w in text_lower for w in ["postmortem", "outage", "incident", "circuit breaker"]):
-        domain = "ops"
-
-    return {"capture": True, "reason": "Contains concrete organizational decision with rationale", "domain": domain}
-
-
-# ============================================================================
-# Test Classes
-# ============================================================================
-
-class TestConversationCaptureDecisions:
-    """Test that conversations are correctly classified as capture vs reject"""
-
-    @pytest.fixture
-    def tier2_filter(self):
-        """Create Tier2Filter with simulated judgment"""
-        from agents.scribe.tier2_filter import Tier2Filter, FilterResult
-
-        f = Tier2Filter.__new__(Tier2Filter)
-        f._provider = "anthropic"
-        f._model = "claude-haiku-4-5-20251001"
-
-        mock_llm = Mock()
-        mock_llm.is_available = True
-
-        def side_effect_evaluate(prompt, **kwargs):
-            # Extract the actual text from "<message>\n...\n</message>" format
-            text = prompt.replace("<message>\n", "").split("\n</message>")[0]
-            text = text.split("\n(Tier 1")[0]
-            judgment = simulate_tier2_judgment(text)
-            return json.dumps(judgment)
-
-        mock_llm.generate.side_effect = side_effect_evaluate
-        f._llm = mock_llm
-        return f
-
-    def test_alice_capture_decisions(self, tier2_filter):
-        """Alice's real decisions should be captured"""
-        captures = [c for c in ALICE_CONVERSATIONS if c.expected_capture]
-        assert len(captures) == 5, "Alice should have 5 capturable decisions"
-
-        for conv in captures:
-            result = tier2_filter.evaluate(conv.text)
-            assert result.should_capture is True, (
-                f"SHOULD CAPTURE but rejected: [{conv.category}] {conv.text[:80]}... "
-                f"Reason: {result.reason}"
-            )
-
-    def test_alice_reject_casual(self, tier2_filter):
-        """Alice's casual/status messages should be rejected"""
-        rejects = [c for c in ALICE_CONVERSATIONS if not c.expected_capture]
-        assert len(rejects) >= 7, "Alice should have >=7 rejectable messages"
-
-        for conv in rejects:
-            result = tier2_filter.evaluate(conv.text)
-            assert result.should_capture is False, (
-                f"SHOULD REJECT but captured: [{conv.category}] {conv.text[:80]}... "
-                f"Reason: {result.reason}"
-            )
-
-    def test_bob_capture_decisions(self, tier2_filter):
-        """Bob's real decisions should be captured"""
-        captures = [c for c in BOB_CONVERSATIONS if c.expected_capture]
-        assert len(captures) == 5, "Bob should have 5 capturable decisions"
-
-        for conv in captures:
-            result = tier2_filter.evaluate(conv.text)
-            assert result.should_capture is True, (
-                f"SHOULD CAPTURE but rejected: [{conv.category}] {conv.text[:80]}... "
-                f"Reason: {result.reason}"
-            )
-
-    def test_bob_reject_casual(self, tier2_filter):
-        """Bob's casual/status messages should be rejected"""
-        rejects = [c for c in BOB_CONVERSATIONS if not c.expected_capture]
-        assert len(rejects) >= 6, "Bob should have >=6 rejectable messages"
-
-        for conv in rejects:
-            result = tier2_filter.evaluate(conv.text)
-            assert result.should_capture is False, (
-                f"SHOULD REJECT but captured: [{conv.category}] {conv.text[:80]}... "
-                f"Reason: {result.reason}"
-            )
-
-    def test_charlie_capture_decisions(self, tier2_filter):
-        """Charlie's real decisions should be captured"""
-        captures = [c for c in CHARLIE_CONVERSATIONS if c.expected_capture]
-        assert len(captures) == 5, "Charlie should have 5 capturable decisions"
-
-        for conv in captures:
-            result = tier2_filter.evaluate(conv.text)
-            assert result.should_capture is True, (
-                f"SHOULD CAPTURE but rejected: [{conv.category}] {conv.text[:80]}... "
-                f"Reason: {result.reason}"
-            )
-
-    def test_charlie_reject_casual(self, tier2_filter):
-        """Charlie's casual/status messages should be rejected"""
-        rejects = [c for c in CHARLIE_CONVERSATIONS if not c.expected_capture]
-        assert len(rejects) >= 5, "Charlie should have >=5 rejectable messages"
-
-        for conv in rejects:
-            result = tier2_filter.evaluate(conv.text)
-            assert result.should_capture is False, (
-                f"SHOULD REJECT but captured: [{conv.category}] {conv.text[:80]}... "
-                f"Reason: {result.reason}"
-            )
-
-    def test_all_borderline_rejected(self, tier2_filter):
-        """All borderline messages (false positives) should be rejected by Tier 2"""
-        borderline = [c for c in ALL_CONVERSATIONS if c.category == "borderline"]
-        assert len(borderline) >= 3, "Should have at least 3 borderline messages"
-
-        for conv in borderline:
-            result = tier2_filter.evaluate(conv.text)
-            assert result.should_capture is False, (
-                f"BORDERLINE should be rejected: {conv.text[:80]}... "
-                f"Note: {conv.note}"
-            )
-
-    def test_all_vague_rejected(self, tier2_filter):
-        """All vague opinions should be rejected by Tier 2"""
-        vague = [c for c in ALL_CONVERSATIONS if c.category == "vague"]
-        assert len(vague) >= 3, "Should have at least 3 vague messages"
-
-        for conv in vague:
-            result = tier2_filter.evaluate(conv.text)
-            assert result.should_capture is False, (
-                f"VAGUE should be rejected: {conv.text[:80]}... "
-                f"Note: {conv.note}"
-            )
-
-    def test_capture_rate(self, tier2_filter):
-        """Overall capture rate should be reasonable (30-50%)"""
-        total = len(ALL_CONVERSATIONS)
-        expected_captures = sum(1 for c in ALL_CONVERSATIONS if c.expected_capture)
-        expected_rate = expected_captures / total
-
-        # Verify our script is balanced
-        assert 0.30 <= expected_rate <= 0.55, (
-            f"Expected capture rate: {expected_rate:.1%} "
-            f"({expected_captures}/{total})"
-        )
-
-
-class TestTier3RecordBuilding:
-    """Test that Tier 3 builds correct Decision Records from captured messages"""
-
-    @pytest.fixture
-    def record_builder(self):
-        """Create RecordBuilder without LLM (regex fallback)"""
-        from agents.scribe.record_builder import RecordBuilder
-        return RecordBuilder()
-
-    def _make_detection(self, conv: ConversationLine):
-        """Create a DetectionResult for a conversation line"""
-        from agents.scribe.detector import DetectionResult
-        return DetectionResult(
-            is_significant=True,
-            confidence=0.85,
-            matched_pattern="decision pattern",
-            category=conv.expected_domain or "general",
-            domain=conv.expected_domain or "general",
-            priority="high",
-        )
-
-    def _make_raw_event(self, conv: ConversationLine):
-        """Create a RawEvent for a conversation line"""
-        from agents.scribe.record_builder import RawEvent
-        return RawEvent(
-            text=conv.text,
-            user=conv.user,
-            channel=conv.channel,
-            timestamp=str(datetime.now(timezone.utc).timestamp()),
-            source="slack",
-        )
-
-    def test_alice_postgresql_decision_record(self, record_builder):
-        """Alice's PostgreSQL decision should produce a proper record"""
-        conv = ALICE_CONVERSATIONS[0]  # PostgreSQL decision
-        raw_event = self._make_raw_event(conv)
-        detection = self._make_detection(conv)
-
-        record = record_builder.build(raw_event, detection)
-
-        # Should have a meaningful title
-        assert len(record.title) > 5
-        # Should have evidence (the quote about ACID compliance)
-        assert len(record.evidence) > 0
-        # At least one evidence should have a real quote
-        has_quote = any("ACID" in e.quote for e in record.evidence)
-        assert has_quote, "Should extract the ACID compliance quote"
-        # Should have rationale
-        assert record.why.rationale_summary or record.why.certainty.value != "unknown"
-        # payload.text should be non-empty
-        assert len(record.payload.text) > 100
-        # payload.text should contain key info
-        assert "PostgreSQL" in record.payload.text or "postgresql" in record.payload.text.lower()
-
-    def test_bob_deployment_policy_record(self, record_builder):
-        """Bob's deployment policy should produce a proper record"""
-        conv = BOB_CONVERSATIONS[2]  # Blue-green deployment
-        raw_event = self._make_raw_event(conv)
-        detection = self._make_detection(conv)
-
-        record = record_builder.build(raw_event, detection)
-
-        assert len(record.title) > 5
-        assert len(record.evidence) > 0
-        assert len(record.payload.text) > 100
-        # Should be associated with the right user
-        assert "bob" in str(record.decision.who)
-
-    def test_charlie_encryption_record(self, record_builder):
-        """Charlie's encryption decision should produce a proper record"""
-        conv = CHARLIE_CONVERSATIONS[2]  # AES-256-GCM
-        raw_event = self._make_raw_event(conv)
-        detection = self._make_detection(conv)
-
-        record = record_builder.build(raw_event, detection)
-
-        assert len(record.title) > 5
-        assert len(record.payload.text) > 100
-        # Should contain encryption-related content
-        payload_lower = record.payload.text.lower()
-        assert "aes" in payload_lower or "encryption" in payload_lower
-
-    def test_all_capture_messages_produce_valid_records(self, record_builder):
-        """All messages expected to be captured should produce valid records"""
-        captures = [c for c in ALL_CONVERSATIONS if c.expected_capture]
-
-        for conv in captures:
-            raw_event = self._make_raw_event(conv)
-            detection = self._make_detection(conv)
-            record = record_builder.build(raw_event, detection)
-
-            # Every record must have:
-            assert record.id, f"Missing ID for: {conv.text[:50]}"
-            assert record.title, f"Missing title for: {conv.text[:50]}"
-            assert record.payload.text, f"Missing payload.text for: {conv.text[:50]}"
-            assert len(record.payload.text) > 50, f"payload.text too short for: {conv.text[:50]}"
-            assert record.evidence, f"Missing evidence for: {conv.text[:50]}"
-            assert record.domain, f"Missing domain for: {conv.text[:50]}"
-            assert record.why.certainty, f"Missing certainty for: {conv.text[:50]}"
-
-    def test_evidence_certainty_consistency(self, record_builder):
-        """Certainty should be consistent with evidence quality"""
-        # Message with direct quote → should be supported or partially_supported
-        conv = ALICE_CONVERSATIONS[0]  # Has explicit quote
-        raw_event = self._make_raw_event(conv)
-        detection = self._make_detection(conv)
-        record = record_builder.build(raw_event, detection)
-
-        assert record.why.certainty.value in ("supported", "partially_supported"), (
-            f"Quote-bearing message should be supported, got: {record.why.certainty.value}"
-        )
-
-    def test_payload_text_is_embeddable(self, record_builder):
-        """payload.text should be formatted for embedding (not JSON)"""
-        conv = ALICE_CONVERSATIONS[0]
-        raw_event = self._make_raw_event(conv)
-        detection = self._make_detection(conv)
-        record = record_builder.build(raw_event, detection)
-
-        payload = record.payload.text
-        # Should be readable text, not JSON
-        assert not payload.strip().startswith("{"), "payload.text should not be JSON"
-        # Should contain markdown headers
-        assert "# " in payload or "## " in payload, "payload.text should have markdown headers"
-
-
-class TestFullPipelineFlow:
-    """End-to-end pipeline flow tests with all 3 tiers"""
-
-    @pytest.fixture
-    def pipeline_components(self):
-        """Set up all pipeline components with mocks"""
-        from agents.scribe.tier2_filter import Tier2Filter
-        from agents.scribe.record_builder import RecordBuilder, RawEvent
-        from agents.scribe.detector import DecisionDetector, DetectionResult
-        from agents.common.pattern_cache import PatternCache, PatternEntry
-
-        # Mock pattern cache for Tier 1
-        pattern_cache = Mock(spec=PatternCache)
-        pattern_cache.pattern_count = 10
-
-        # Tier 1: always pass for this test (we test Tier 2 filtering)
-        def mock_best_match(text, threshold=0.0):
-            if len(text.strip()) < 20:
-                return (None, 0.1)
-            entry = PatternEntry(
-                text="We decided to use",
-                category="architecture",
-                priority="high",
-                embedding=[0.1] * 384,
-                domain="architecture",
-            )
-            return (entry, 0.75)  # Above threshold but below auto-capture
-
-        pattern_cache.find_best_match.side_effect = mock_best_match
-
-        detector = DecisionDetector(
-            pattern_cache=pattern_cache,
-            threshold=0.5,
-            high_confidence_threshold=0.8,
-        )
-
-        # Tier 2: simulated judgment
-        tier2 = Tier2Filter.__new__(Tier2Filter)
-        tier2._provider = "anthropic"
-        tier2._model = "test"
-
-        mock_llm = Mock()
-        mock_llm.is_available = True
-
-        def tier2_side_effect(prompt, **kwargs):
-            # Extract the actual text from "<message>\n...\n</message>" format
-            text = prompt.replace("<message>\n", "").split("\n</message>")[0]
-            text = text.split("\n(Tier 1")[0]
-            judgment = simulate_tier2_judgment(text)
-            return json.dumps(judgment)
-
-        mock_llm.generate.side_effect = tier2_side_effect
-        tier2._llm = mock_llm
-
-        # Tier 3: RecordBuilder (regex fallback, no LLM)
-        builder = RecordBuilder()
-
-        return {
-            "detector": detector,
-            "tier2": tier2,
-            "builder": builder,
-        }
-
-    def _run_pipeline(self, components, conv: ConversationLine):
-        """Run a message through the full 3-tier pipeline"""
-        from agents.scribe.record_builder import RawEvent
-        from agents.common.language import detect_language
-
-        detector = components["detector"]
-        tier2 = components["tier2"]
-        builder = components["builder"]
-
-        # Tier 1
-        result = detector.detect(conv.text)
-        if not result.is_significant:
-            return {"tier1": "reject", "record": None}
-
-        # Tier 2
-        filter_result = tier2.evaluate(
-            text=conv.text,
-            tier1_score=result.confidence,
-            tier1_pattern=result.matched_pattern or "",
-        )
-        if not filter_result.should_capture:
-            return {"tier1": "pass", "tier2": "reject", "reason": filter_result.reason, "record": None}
-
-        # Use Tier 2 domain hint
-        if filter_result.domain != "general" and result.domain in (None, "general"):
-            result.domain = filter_result.domain
-
-        # Tier 3
-        raw_event = RawEvent(
-            text=conv.text,
-            user=conv.user,
-            channel=conv.channel,
-            timestamp=str(datetime.now(timezone.utc).timestamp()),
-            source="slack",
-        )
-        language = detect_language(conv.text)
-        record = builder.build(raw_event, result, language=language)
-
-        # Auto-capture vs review
-        action = "auto_capture" if detector.should_auto_capture(result) else "review_queue"
-
-        return {
-            "tier1": "pass",
-            "tier2": "pass",
-            "tier3": "built",
-            "action": action,
-            "record": record,
-        }
-
-    def test_alice_full_pipeline(self, pipeline_components):
-        """Run all of Alice's messages through the full pipeline"""
-        for conv in ALICE_CONVERSATIONS:
-            result = self._run_pipeline(pipeline_components, conv)
-
-            if conv.expected_capture:
-                assert result.get("record") is not None, (
-                    f"Alice CAPTURE expected but no record: {conv.text[:60]}..."
-                )
-            else:
-                assert result.get("record") is None, (
-                    f"Alice REJECT expected but got record: {conv.text[:60]}..."
-                )
-
-    def test_bob_full_pipeline(self, pipeline_components):
-        """Run all of Bob's messages through the full pipeline"""
-        for conv in BOB_CONVERSATIONS:
-            result = self._run_pipeline(pipeline_components, conv)
-
-            if conv.expected_capture:
-                assert result.get("record") is not None, (
-                    f"Bob CAPTURE expected but no record: {conv.text[:60]}..."
-                )
-            else:
-                assert result.get("record") is None, (
-                    f"Bob REJECT expected but got record: {conv.text[:60]}..."
-                )
-
-    def test_charlie_full_pipeline(self, pipeline_components):
-        """Run all of Charlie's messages through the full pipeline"""
-        for conv in CHARLIE_CONVERSATIONS:
-            result = self._run_pipeline(pipeline_components, conv)
-
-            if conv.expected_capture:
-                assert result.get("record") is not None, (
-                    f"Charlie CAPTURE expected but no record: {conv.text[:60]}..."
-                )
-            else:
-                assert result.get("record") is None, (
-                    f"Charlie REJECT expected but got record: {conv.text[:60]}..."
-                )
-
-    def test_pipeline_statistics(self, pipeline_components):
-        """Verify overall pipeline statistics match expectations"""
-        stats = {
-            "total": 0,
-            "tier1_reject": 0,
-            "tier2_reject": 0,
-            "captured": 0,
-            "review_queue": 0,
-            "auto_capture": 0,
-        }
-
-        for conv in ALL_CONVERSATIONS:
-            stats["total"] += 1
-            result = self._run_pipeline(pipeline_components, conv)
-
-            if result.get("tier1") == "reject":
-                stats["tier1_reject"] += 1
-            elif result.get("tier2") == "reject":
-                stats["tier2_reject"] += 1
-            elif result.get("record"):
-                stats["captured"] += 1
-                if result.get("action") == "auto_capture":
-                    stats["auto_capture"] += 1
-                else:
-                    stats["review_queue"] += 1
-
-        # Tier 1 should pass most messages (wide net at 0.5 threshold)
-        # Tier 2 should reject the false positives
-        expected_captures = sum(1 for c in ALL_CONVERSATIONS if c.expected_capture)
-        assert stats["captured"] == expected_captures, (
-            f"Expected {expected_captures} captures, got {stats['captured']}. "
-            f"Stats: {stats}"
-        )
-
-        # Tier 2 should have rejected some messages that Tier 1 passed
-        assert stats["tier2_reject"] > 0, (
-            f"Tier 2 should reject some Tier 1 false positives. Stats: {stats}"
-        )
-
-    def test_no_short_messages_captured(self, pipeline_components):
-        """Short messages should be rejected at Tier 1"""
-        short_messages = [
-            ConversationLine(text="Hi", user="alice", channel="#general", expected_capture=False),
-            ConversationLine(text="OK", user="bob", channel="#general", expected_capture=False),
-            ConversationLine(text="lgtm", user="charlie", channel="#engineering", expected_capture=False),
-            ConversationLine(text="", user="alice", channel="#general", expected_capture=False),
-        ]
-
-        for conv in short_messages:
-            result = self._run_pipeline(pipeline_components, conv)
-            assert result.get("record") is None, (
-                f"Short message should not be captured: '{conv.text}'"
-            )
-
-
-class TestReviewQueueIntegration:
-    """Test that moderate-confidence captures go to review queue"""
-
-    def test_review_queue_add_and_retrieve(self, tmp_path):
-        """Test adding records to review queue and retrieving them"""
-        from agents.scribe.review_queue import ReviewQueue
-        from agents.scribe.record_builder import RecordBuilder, RawEvent
-        from agents.scribe.detector import DetectionResult
-        from agents.common.language import detect_language
-
-        queue = ReviewQueue(queue_path=tmp_path / "test_review.json")
-        builder = RecordBuilder()
-
-        # Add a few records from different team members
-        test_messages = [
-            ALICE_CONVERSATIONS[0],  # PostgreSQL decision
-            BOB_CONVERSATIONS[0],    # Sprint prioritization
-            CHARLIE_CONVERSATIONS[0], # mTLS mandate
-        ]
-
-        record_ids = []
-        for conv in test_messages:
-            raw_event = RawEvent(
-                text=conv.text,
-                user=conv.user,
-                channel=conv.channel,
-                timestamp=str(datetime.now(timezone.utc).timestamp()),
-                source="slack",
-            )
-            detection = DetectionResult(
-                is_significant=True,
-                confidence=0.75,  # Below auto-capture threshold
-                matched_pattern="test",
-                category=conv.expected_domain,
-                domain=conv.expected_domain,
-                priority="medium",
-            )
-            language = detect_language(conv.text)
-            record = builder.build(raw_event, detection, language=language)
-            rid = queue.add(record, detection.confidence)
-            record_ids.append(rid)
-
-        # Verify queue state
-        pending = queue.get_pending()
-        assert len(pending) == 3, f"Expected 3 pending, got {len(pending)}"
-
-        # Each item should have questions
-        for item in pending:
-            assert len(item.questions) >= 3, "Each item should have at least 3 review questions"
-
-        # Verify stats
-        stats = queue.get_stats()
-        assert stats["pending"] == 3
-        assert stats["reviewed"] == 0
-
-    def test_review_approve_updates_record(self, tmp_path):
-        """Test that approving a review updates the record correctly"""
-        from agents.scribe.review_queue import ReviewQueue, ReviewAnswers, ReviewAnswer
-        from agents.scribe.record_builder import RecordBuilder, RawEvent
-        from agents.scribe.detector import DetectionResult
-        from agents.common.language import detect_language
-
-        queue = ReviewQueue(queue_path=tmp_path / "test_review.json")
-        builder = RecordBuilder()
-
-        conv = ALICE_CONVERSATIONS[0]  # PostgreSQL decision
-        raw_event = RawEvent(
-            text=conv.text, user=conv.user, channel=conv.channel,
-            timestamp=str(datetime.now(timezone.utc).timestamp()), source="slack",
-        )
-        detection = DetectionResult(
-            is_significant=True, confidence=0.75,
-            matched_pattern="test", category="architecture",
-            domain="architecture", priority="medium",
-        )
-        language = detect_language(conv.text)
-        record = builder.build(raw_event, detection, language=language)
-        rid = queue.add(record, detection.confidence)
-
-        # Submit review: approve with corrections
-        answers = ReviewAnswers(
-            q1_worth_saving=ReviewAnswer.CAPTURE,
-            q2_evidence_supported=ReviewAnswer.SUPPORTED,
-            q3_sensitivity=ReviewAnswer.INTERNAL,
-            q4_status=ReviewAnswer.ACCEPTED,
-            reviewer_notes="Confirmed with CTO",
-        )
-        updated = queue.submit_review(rid, answers, reviewer="reviewer1")
-
-        assert updated is not None, "Approved review should return updated record"
-        assert updated.why.certainty.value == "supported"
-        assert updated.sensitivity.value == "internal"
-        assert updated.status.value == "accepted"
-        assert updated.quality.review_state.value == "approved"
-
-    def test_review_reject_discards(self, tmp_path):
-        """Test that rejecting a review discards the record"""
-        from agents.scribe.review_queue import ReviewQueue, ReviewAnswers, ReviewAnswer
-        from agents.scribe.record_builder import RecordBuilder, RawEvent
-        from agents.scribe.detector import DetectionResult
-        from agents.common.language import detect_language
-
-        queue = ReviewQueue(queue_path=tmp_path / "test_review.json")
-        builder = RecordBuilder()
-
-        # Use a borderline message that shouldn't be captured
-        conv = ALICE_CONVERSATIONS[-2]  # Pizza decision
-        raw_event = RawEvent(
-            text=conv.text, user=conv.user, channel=conv.channel,
-            timestamp=str(datetime.now(timezone.utc).timestamp()), source="slack",
-        )
-        detection = DetectionResult(
-            is_significant=True, confidence=0.55,
-            matched_pattern="We decided", category="general",
-            domain="general", priority="medium",
-        )
-        language = detect_language(conv.text)
-        record = builder.build(raw_event, detection, language=language)
-        rid = queue.add(record, detection.confidence)
-
-        # Reviewer rejects
-        answers = ReviewAnswers(
-            q1_worth_saving=ReviewAnswer.IGNORE,
-            q2_evidence_supported=ReviewAnswer.UNKNOWN,
-            q3_sensitivity=ReviewAnswer.PUBLIC,
-        )
-        result = queue.submit_review(rid, answers, reviewer="reviewer1")
-
-        assert result is None, "Rejected review should return None"
-        assert queue.get_item(rid).status == "rejected"
-
-
-class TestCrossTeamRecall:
-    """Test that decisions from one team member can be recalled by another"""
-
-    def test_recall_queries_well_formed(self):
-        """Verify recall query test data is well-formed"""
-        assert len(RECALL_QUERIES) >= 6, "Should have at least 6 cross-member recall queries"
-
-        # All queries should reference different source members
-        source_members = set(q.expected_source_member for q in RECALL_QUERIES)
-        assert len(source_members) >= 3, "Queries should span all 3 team members"
-
-        # All queries should have different searchers
-        searchers = set(q.searcher for q in RECALL_QUERIES)
-        assert len(searchers) >= 3, "All 3 members should search for others' decisions"
-
-    def test_query_processor_parses_recall_queries(self):
-        """QueryProcessor should correctly parse recall queries"""
-        from agents.retriever.query_processor import QueryProcessor, QueryIntent
-
-        qp = QueryProcessor()
-
-        specific_intent_count = 0
-        for rq in RECALL_QUERIES:
-            parsed = qp.parse(rq.query)
-
-            if parsed.intent != QueryIntent.GENERAL:
-                specific_intent_count += 1
-
-            assert len(parsed.keywords) > 0, (
-                f"Query should have keywords: {rq.query}"
-            )
-            assert len(parsed.expanded_queries) >= 1, (
-                f"Query should have expansions: {rq.query}"
-            )
-
-        # At least half of recall queries should get a specific intent
-        assert specific_intent_count >= len(RECALL_QUERIES) // 2, (
-            f"At least half of recall queries should have specific intent, "
-            f"got {specific_intent_count}/{len(RECALL_QUERIES)}"
-        )
-
-
-
-class TestConversationScriptCompleteness:
-    """Meta-tests to verify the conversation scripts are comprehensive"""
-
-    def test_all_members_have_conversations(self):
-        """Each member should have a good mix of conversations"""
-        for name, convs in [("alice", ALICE_CONVERSATIONS), ("bob", BOB_CONVERSATIONS), ("charlie", CHARLIE_CONVERSATIONS)]:
-            captures = [c for c in convs if c.expected_capture]
-            rejects = [c for c in convs if not c.expected_capture]
-            assert len(captures) >= 5, f"{name} should have >=5 capture messages"
-            assert len(rejects) >= 5, f"{name} should have >=5 reject messages"
-
-    def test_categories_covered(self):
-        """All conversation categories should be represented"""
-        categories = set(c.category for c in ALL_CONVERSATIONS)
-        assert "decision" in categories
-        assert "casual" in categories
-        assert "status" in categories
-        assert "vague" in categories
-        assert "borderline" in categories
-
-    def test_domains_covered(self):
-        """Multiple domains should be represented"""
-        domains = set(c.expected_domain for c in ALL_CONVERSATIONS if c.expected_domain)
-        assert "architecture" in domains
-        assert "security" in domains
-        assert "product" in domains or "ops" in domains
-
-    def test_total_conversation_count(self):
-        """Should have a substantial number of test conversations"""
-        assert len(ALL_CONVERSATIONS) >= 30, (
-            f"Should have >=30 total conversations, got {len(ALL_CONVERSATIONS)}"
-        )
-
-    def test_recall_queries_span_all_pairs(self):
-        """Recall queries should test all member-to-member pairs"""
-        pairs = set()
-        for rq in RECALL_QUERIES:
-            pairs.add((rq.searcher, rq.expected_source_member))
-
-        # At least one query per searcher-target pair
-        assert ("bob", "alice") in pairs, "Bob should query Alice's decisions"
-        assert ("alice", "charlie") in pairs, "Alice should query Charlie's decisions"
-        assert ("charlie", "bob") in pairs, "Charlie should query Bob's decisions"
diff --git a/agents/tests/test_record_builder.py b/agents/tests/test_record_builder.py
deleted file mode 100644
index 3704d01..0000000
--- a/agents/tests/test_record_builder.py
+++ /dev/null
@@ -1,748 +0,0 @@
-"""
-Tests for Record Builder
-
-Tests Decision Record creation with evidence validation.
-"""
-
-import pytest
-from datetime import datetime
-
-
-class TestRecordBuilder:
-    """Tests for RecordBuilder"""
-
-    @pytest.fixture
-    def builder(self):
-        from agents.scribe.record_builder import RecordBuilder
-        from agents.common.schemas import Sensitivity
-
-        return RecordBuilder(default_sensitivity=Sensitivity.INTERNAL)
-
-    @pytest.fixture
-    def sample_raw_event(self):
-        from agents.scribe.record_builder import RawEvent
-
-        return RawEvent(
-            text='We decided to use PostgreSQL over MySQL because "better JSON support and team familiarity"',
-            user="U12345",
-            channel="architecture",
-            timestamp="1706799600.123456",
-            source="slack",
-            thread_ts=None,
-            url="https://slack.com/archives/C123/p1706799600123456",
-        )
-
-    @pytest.fixture
-    def sample_detection(self):
-        from agents.scribe.detector import DetectionResult
-
-        return DetectionResult(
-            is_significant=True,
-            confidence=0.85,
-            matched_pattern="We decided to use",
-            category="architecture",
-            domain="architecture",
-            priority="high",
-        )
-
-    def test_build_basic_record(self, builder, sample_raw_event, sample_detection):
-        """Test building a basic Decision Record"""
-        record = builder.build(sample_raw_event, sample_detection)
-
-        assert record is not None
-        assert record.schema_version == "2.1"
-        assert record.type == "decision_record"
-        assert "PostgreSQL" in record.title or "architecture" in record.title.lower()
-
-    def test_record_has_payload_text(self, builder, sample_raw_event, sample_detection):
-        """Test that record has generated payload.text"""
-        record = builder.build(sample_raw_event, sample_detection)
-
-        assert record.payload.text != ""
-        assert record.payload.format == "markdown"
-        assert "Decision Record" in record.payload.text
-
-    def test_evidence_extraction_with_quotes(self, builder, sample_detection):
-        """Test evidence extraction when text has quotes"""
-        from agents.scribe.record_builder import RawEvent
-
-        event = RawEvent(
-            text='We chose Redis because "it is fast and team knows it well"',
-            user="U12345",
-            channel="engineering",
-            timestamp="1706799600",
-            source="slack",
-        )
-
-        record = builder.build(event, sample_detection)
-
-        assert len(record.evidence) > 0
-        # Should have extracted the quote
-        quotes_found = any("fast" in e.quote.lower() for e in record.evidence)
-        assert quotes_found or len(record.evidence) > 0
-
-    def test_certainty_unknown_without_evidence(self, builder, sample_detection):
-        """Test that certainty is 'unknown' when no proper evidence"""
-        from agents.scribe.record_builder import RawEvent
-        from agents.common.schemas import Certainty
-
-        event = RawEvent(
-            text="We should use X",  # No quote, no clear evidence
-            user="U12345",
-            channel="general",
-            timestamp="1706799600",
-            source="slack",
-        )
-
-        record = builder.build(event, sample_detection)
-
-        # Without clear quotes, certainty should not be "supported"
-        # It should be either "unknown" or "partially_supported"
-        assert record.why.certainty in (Certainty.UNKNOWN, Certainty.PARTIALLY_SUPPORTED)
-
-    def test_status_proposed_without_evidence(self, builder, sample_detection):
-        """Test that status is 'proposed' when evidence is weak"""
-        from agents.scribe.record_builder import RawEvent
-        from agents.common.schemas import Status
-
-        event = RawEvent(
-            text="Maybe we should consider X",  # No definitive decision
-            user="U12345",
-            channel="general",
-            timestamp="1706799600",
-            source="slack",
-        )
-
-        record = builder.build(event, sample_detection)
-
-        # Should be proposed, not accepted
-        assert record.status == Status.PROPOSED
-
-    def test_sensitive_data_redaction(self, builder, sample_detection):
-        """Test that sensitive data is redacted"""
-        from agents.scribe.record_builder import RawEvent
-
-        event = RawEvent(
-            text="We decided to use API key api_secret_abc123xyz7890123456 for auth and email test@example.com",
-            user="U12345",
-            channel="security",
-            timestamp="1706799600",
-            source="slack",
-        )
-
-        record = builder.build(event, sample_detection)
-
-        # API key and email should be redacted
-        assert "api_secret_abc123xyz7890123456" not in record.decision.what
-        assert "test@example.com" not in record.decision.what
-        assert "[API_KEY]" in record.decision.what or "[EMAIL]" in record.decision.what
-
-    def test_domain_extraction(self, builder, sample_raw_event, sample_detection):
-        """Test domain is extracted from detection result"""
-        from agents.common.schemas import Domain
-
-        record = builder.build(sample_raw_event, sample_detection)
-
-        assert record.domain == Domain.ARCHITECTURE
-
-    def test_tags_extraction(self, builder, sample_detection):
-        """Test tags are extracted from content"""
-        from agents.scribe.record_builder import RawEvent
-
-        event = RawEvent(
-            text="We decided to use #microservices architecture for scalability",
-            user="U12345",
-            channel="architecture",
-            timestamp="1706799600",
-            source="slack",
-        )
-
-        record = builder.build(event, sample_detection)
-
-        # Should have extracted tags
-        assert len(record.tags) > 0
-        # Should include the hashtag
-        assert "microservices" in record.tags or any("micro" in t for t in record.tags)
-
-    def test_evidence_certainty_consistency(self, builder, sample_detection):
-        """Test that ensure_evidence_certainty_consistency works"""
-        from agents.scribe.record_builder import RawEvent
-        from agents.common.schemas import Certainty
-
-        event = RawEvent(
-            text="We chose X",  # Minimal text, weak evidence
-            user="U12345",
-            channel="general",
-            timestamp="1706799600",
-            source="slack",
-        )
-
-        record = builder.build(event, sample_detection)
-
-        # Consistency should be maintained
-        is_valid = record.validate_evidence_certainty()
-        if not is_valid:
-            record.ensure_evidence_certainty_consistency()
-
-        # After consistency check, should be valid
-        assert record.validate_evidence_certainty()
-
-
-class TestRecordBuilderMultilingual:
-    """Tests for multilingual (LLM extraction) path in RecordBuilder"""
-
-    @pytest.fixture
-    def mock_llm_extractor(self):
-        from unittest.mock import Mock
-        from agents.scribe.llm_extractor import LLMExtractor, ExtractedFields
-
-        extractor = Mock(spec=LLMExtractor)
-        extractor.is_available = True
-        fields = ExtractedFields(
-            title="Adopt PostgreSQL for database",
-            rationale="Better JSON support and team familiarity with PostgreSQL",
-            problem="Need a reliable database for financial transactions",
-            alternatives=["MySQL", "MongoDB"],
-            trade_offs=["More complex setup required"],
-            status_hint="accepted",
-            tags=["database", "postgresql"],
-        )
-        extractor.extract.return_value = fields
-        extractor.extract_single.return_value = fields
-        return extractor
-
-    @pytest.fixture
-    def builder_with_llm(self, mock_llm_extractor):
-        from agents.scribe.record_builder import RecordBuilder
-        from agents.common.schemas import Sensitivity
-
-        return RecordBuilder(
-            default_sensitivity=Sensitivity.INTERNAL,
-            llm_extractor=mock_llm_extractor,
-        )
-
-    @pytest.fixture
-    def korean_raw_event(self):
-        from agents.scribe.record_builder import RawEvent
-
-        return RawEvent(
-            text='PostgreSQL을 사용하기로 결정했습니다. "JSON 지원이 좋고 팀이 익숙하기 때문" 이라고 합의했습니다.',
-            user="U99999",
-            channel="architecture",
-            timestamp="1706799600.123456",
-            source="slack",
-            url="https://slack.com/archives/C123/p1706799600123456",
-        )
-
-    @pytest.fixture
-    def sample_detection(self):
-        from agents.scribe.detector import DetectionResult
-
-        return DetectionResult(
-            is_significant=True,
-            confidence=0.85,
-            matched_pattern="X를 사용하기로 결정했다",
-            category="architecture",
-            domain="architecture",
-            priority="high",
-        )
-
-    def test_llm_path_used_for_korean(self, builder_with_llm, korean_raw_event, sample_detection, mock_llm_extractor):
-        """Test that LLM extractor is called for Korean text"""
-        from agents.common.language import LanguageInfo
-
-        language = LanguageInfo(code="ko", confidence=0.95, script="Hangul")
-        record = builder_with_llm.build(korean_raw_event, sample_detection, language=language)
-
-        mock_llm_extractor.extract_single.assert_called_once()
-        assert record is not None
-        assert record.title == "Adopt PostgreSQL for database"
-
-    def test_llm_extracted_fields_used(self, builder_with_llm, korean_raw_event, sample_detection):
-        """Test that LLM-extracted fields are used in the record"""
-        from agents.common.language import LanguageInfo
-        from agents.common.schemas import Status
-
-        language = LanguageInfo(code="ko", confidence=0.95, script="Hangul")
-        record = builder_with_llm.build(korean_raw_event, sample_detection, language=language)
-
-        assert record.why.rationale_summary == "Better JSON support and team familiarity with PostgreSQL"
-        assert record.context.problem == "Need a reliable database for financial transactions"
-        assert "MySQL" in record.context.alternatives
-        assert "MongoDB" in record.context.alternatives
-        assert record.status == Status.ACCEPTED
-        assert "database" in record.tags
-
-    def test_llm_used_for_english_when_available(self, builder_with_llm, sample_detection, mock_llm_extractor):
-        """Test that LLM is used for English when available (robust to typos)"""
-        from agents.scribe.record_builder import RawEvent
-
-        event = RawEvent(
-            text='We decided to use PostgreSQL because "better JSON support"',
-            user="U12345",
-            channel="architecture",
-            timestamp="1706799600",
-            source="slack",
-        )
-
-        record = builder_with_llm.build(event, sample_detection)
-
-        # LLM extractor SHOULD be called for English too (preferred for all languages)
-        mock_llm_extractor.extract_single.assert_called_once()
-        assert record is not None
-        assert record.title == "Adopt PostgreSQL for database"
-
-    def test_llm_used_for_all_languages(self, builder_with_llm, sample_detection, mock_llm_extractor):
-        """Test that LLM is used for all languages when available (language-agnostic)"""
-        from agents.scribe.record_builder import RawEvent
-        from agents.common.language import LanguageInfo
-
-        event = RawEvent(
-            text='We decided to use PostgreSQL because "better JSON support"',
-            user="U12345",
-            channel="architecture",
-            timestamp="1706799600",
-            source="slack",
-        )
-
-        language = LanguageInfo(code="en", confidence=0.99, script="Latin")
-        record = builder_with_llm.build(event, sample_detection, language=language)
-
-        # LLM extractor SHOULD be called regardless of language
-        mock_llm_extractor.extract_single.assert_called_once()
-        assert record is not None
-        assert record.title == "Adopt PostgreSQL for database"
-
-    def test_fallback_when_llm_unavailable(self, sample_detection):
-        """Test regex fallback when LLM extractor is not available"""
-        from agents.scribe.record_builder import RecordBuilder, RawEvent
-        from agents.common.schemas import Sensitivity
-        from agents.common.language import LanguageInfo
-        from unittest.mock import Mock
-        from agents.scribe.llm_extractor import LLMExtractor
-
-        extractor = Mock(spec=LLMExtractor)
-        extractor.is_available = False
-
-        builder = RecordBuilder(
-            default_sensitivity=Sensitivity.INTERNAL,
-            llm_extractor=extractor,
-        )
-
-        event = RawEvent(
-            text='PostgreSQL을 사용하기로 결정했습니다',
-            user="U99999",
-            channel="architecture",
-            timestamp="1706799600",
-            source="slack",
-        )
-
-        language = LanguageInfo(code="ko", confidence=0.95, script="Hangul")
-        record = builder.build(event, sample_detection, language=language)
-
-        # Should use regex fallback, not call LLM
-        extractor.extract.assert_not_called()
-        assert record is not None
-
-    def test_status_from_hint_accepted(self, builder_with_llm, korean_raw_event, sample_detection):
-        """Test status_hint='accepted' maps to ACCEPTED"""
-        from agents.common.language import LanguageInfo
-        from agents.common.schemas import Status
-
-        language = LanguageInfo(code="ko", confidence=0.95, script="Hangul")
-        record = builder_with_llm.build(korean_raw_event, sample_detection, language=language)
-
-        assert record.status == Status.ACCEPTED
-
-    def test_status_from_hint_proposed(self, sample_detection):
-        """Test status_hint='proposed' maps to PROPOSED"""
-        from agents.scribe.record_builder import RecordBuilder, RawEvent
-        from agents.common.schemas import Sensitivity, Status
-        from agents.common.language import LanguageInfo
-        from agents.scribe.llm_extractor import ExtractedFields
-        from unittest.mock import Mock
-
-        extractor = Mock()
-        extractor.is_available = True
-        fields = ExtractedFields(
-            title="Consider using Redis",
-            status_hint="proposed",
-        )
-        extractor.extract.return_value = fields
-        extractor.extract_single.return_value = fields
-
-        builder = RecordBuilder(
-            default_sensitivity=Sensitivity.INTERNAL,
-            llm_extractor=extractor,
-        )
-
-        event = RawEvent(
-            text='Redis를 고려해봐야 할 것 같습니다',
-            user="U99999",
-            channel="architecture",
-            timestamp="1706799600",
-            source="slack",
-        )
-
-        language = LanguageInfo(code="ko", confidence=0.95, script="Hangul")
-        record = builder.build(event, sample_detection, language=language)
-
-        assert record.status == Status.PROPOSED
-
-    def test_record_has_payload_text_for_korean(self, builder_with_llm, korean_raw_event, sample_detection):
-        """Test that payload.text is generated for Korean input"""
-        from agents.common.language import LanguageInfo
-
-        language = LanguageInfo(code="ko", confidence=0.95, script="Hangul")
-        record = builder_with_llm.build(korean_raw_event, sample_detection, language=language)
-
-        assert record.payload.text != ""
-        assert record.payload.format == "markdown"
-        assert "Decision Record" in record.payload.text
-
-    def test_llm_handles_typos_and_informal_english(self, builder_with_llm, sample_detection, mock_llm_extractor):
-        """Test that LLM extraction handles typos and informal language in English"""
-        from agents.scribe.record_builder import RawEvent
-        from agents.common.language import LanguageInfo
-
-        # Informal English with typos and abbreviations
-        event = RawEvent(
-            text='we decidd 2 use postgres bcuz "its got gr8 json support & team knows it"',
-            user="U12345",
-            channel="architecture",
-            timestamp="1706799600",
-            source="slack",
-        )
-
-        language = LanguageInfo(code="en", confidence=0.95, script="Latin")
-        record = builder_with_llm.build(event, sample_detection, language=language)
-
-        # LLM should handle typos/informal language gracefully
-        mock_llm_extractor.extract_single.assert_called_once()
-        assert record is not None
-        assert record.title == "Adopt PostgreSQL for database"
-
-
-class TestPayloadTextGeneration:
-    """Tests for payload.text generation"""
-
-    @pytest.fixture
-    def sample_record(self):
-        from agents.common.schemas import (
-            DecisionRecord, DecisionDetail, Context, Why, Evidence,
-            SourceRef, Quality, Payload, Domain, Sensitivity, Status,
-            Certainty, ReviewState, SourceType
-        )
-
-        return DecisionRecord(
-            id="dec_2024-02-01_arch_postgres",
-            domain=Domain.ARCHITECTURE,
-            sensitivity=Sensitivity.INTERNAL,
-            status=Status.ACCEPTED,
-            title="Adopt PostgreSQL for database",
-            decision=DecisionDetail(
-                what="We will use PostgreSQL as our primary database",
-                who=["role:tech_lead", "user:alice"],
-                where="slack:#architecture",
-                when="2024-02-01",
-            ),
-            context=Context(
-                problem="Need a reliable database with JSON support",
-                alternatives=["MySQL", "PostgreSQL", "MongoDB"],
-                chosen="PostgreSQL",
-                trade_offs=["More complex setup", "Better long-term flexibility"],
-            ),
-            why=Why(
-                rationale_summary="Better JSON support and team familiarity",
-                certainty=Certainty.SUPPORTED,
-                missing_info=[],
-            ),
-            evidence=[
-                Evidence(
-                    claim="Team prefers PostgreSQL",
-                    quote="We all know Postgres well",
-                    source=SourceRef(
-                        type=SourceType.SLACK,
-                        url="https://slack.com/...",
-                        pointer="channel:#arch",
-                    ),
-                ),
-            ],
-            tags=["database", "postgres", "architecture"],
-            quality=Quality(scribe_confidence=0.9),
-            payload=Payload(format="markdown", text=""),
-        )
-
-    def test_render_payload_text(self, sample_record):
-        from agents.common.schemas.templates import render_payload_text
-
-        payload_text = render_payload_text(sample_record)
-
-        assert "Decision Record: Adopt PostgreSQL" in payload_text
-        assert "dec_2024-02-01_arch_postgres" in payload_text
-        assert "PostgreSQL" in payload_text
-        assert "JSON support" in payload_text
-        assert "Evidence" in payload_text
-        assert "We all know Postgres well" in payload_text
-
-    def test_payload_text_contains_certainty(self, sample_record):
-        from agents.common.schemas.templates import render_payload_text
-
-        payload_text = render_payload_text(sample_record)
-
-        assert "Certainty: supported" in payload_text
-
-    def test_payload_text_contains_alternatives(self, sample_record):
-        from agents.common.schemas.templates import render_payload_text
-
-        payload_text = render_payload_text(sample_record)
-
-        assert "MySQL" in payload_text
-        assert "MongoDB" in payload_text
-        assert "(chosen)" in payload_text
-
-
-class TestBuildPhases:
-    """Tests for RecordBuilder.build_phases()"""
-
-    @pytest.fixture
-    def sample_detection(self):
-        from agents.scribe.detector import DetectionResult
-
-        return DetectionResult(
-            is_significant=True,
-            confidence=0.85,
-            matched_pattern="We decided",
-            category="architecture",
-            domain="architecture",
-            priority="high",
-        )
-
-    @pytest.fixture
-    def sample_raw_event(self):
-        from agents.scribe.record_builder import RawEvent
-
-        return RawEvent(
-            text='We decided to use PostgreSQL because "better JSON support"',
-            user="U12345",
-            channel="architecture",
-            timestamp="1706799600",
-            source="slack",
-        )
-
-    def test_single_extraction_returns_one_record(self, sample_raw_event, sample_detection):
-        """build_phases returns 1-element list for single extraction"""
-        from agents.scribe.record_builder import RecordBuilder
-        from agents.scribe.llm_extractor import ExtractedFields, ExtractionResult
-        from agents.common.schemas import Sensitivity
-        from unittest.mock import Mock
-
-        extractor = Mock()
-        extractor.is_available = True
-        extractor.extract.return_value = ExtractionResult(
-            group_title="",
-            group_type="",
-            status_hint="accepted",
-            tags=["database"],
-            single=ExtractedFields(
-                title="Adopt PostgreSQL",
-                rationale="Better JSON support",
-                problem="Need a database",
-                status_hint="accepted",
-                tags=["database"],
-            ),
-            phases=None,
-        )
-
-        builder = RecordBuilder(
-            default_sensitivity=Sensitivity.INTERNAL,
-            llm_extractor=extractor,
-        )
-
-        records = builder.build_phases(sample_raw_event, sample_detection)
-
-        assert len(records) == 1
-        assert records[0].title == "Adopt PostgreSQL"
-        assert records[0].group_id is None
-
-    def test_multi_phase_returns_linked_records(self, sample_raw_event, sample_detection):
-        """build_phases returns multiple records with consistent group fields for phase chain"""
-        from agents.scribe.record_builder import RecordBuilder
-        from agents.scribe.llm_extractor import PhaseExtractedFields, ExtractionResult
-        from agents.common.schemas import Sensitivity
-        from unittest.mock import Mock
-
-        phases = [
-            PhaseExtractedFields(
-                phase_title="Market Analysis",
-                phase_decision="Target mid-market SaaS companies",
-                phase_rationale="Largest underserved segment",
-                phase_problem="Need to identify target market",
-                tags=["market"],
-            ),
-            PhaseExtractedFields(
-                phase_title="Pricing Strategy",
-                phase_decision="Freemium with usage-based tiers",
-                phase_rationale="Lowers adoption barrier",
-                phase_problem="Need a pricing model",
-                tags=["pricing"],
-            ),
-            PhaseExtractedFields(
-                phase_title="Roadmap",
-                phase_decision="Ship MVP in Q1, enterprise in Q2",
-                phase_rationale="Fast iteration cycle",
-                phase_problem="Need delivery timeline",
-                tags=["roadmap"],
-            ),
-        ]
-
-        extractor = Mock()
-        extractor.is_available = True
-        extractor.extract.return_value = ExtractionResult(
-            group_title="PLG Strategy",
-            group_type="phase_chain",
-            status_hint="accepted",
-            tags=["strategy"],
-            single=None,
-            phases=phases,
-        )
-
-        builder = RecordBuilder(
-            default_sensitivity=Sensitivity.INTERNAL,
-            llm_extractor=extractor,
-        )
-
-        records = builder.build_phases(sample_raw_event, sample_detection)
-
-        assert len(records) == 3
-
-        # Verify the builder linked them under one group
-        group_ids = {r.group_id for r in records}
-        assert len(group_ids) == 1
-        assert None not in group_ids
-
-    def test_no_llm_falls_back_to_single(self, sample_raw_event, sample_detection):
-        """build_phases returns single record when LLM unavailable"""
-        from agents.scribe.record_builder import RecordBuilder
-        from agents.common.schemas import Sensitivity
-
-        builder = RecordBuilder(default_sensitivity=Sensitivity.INTERNAL)
-
-        records = builder.build_phases(sample_raw_event, sample_detection)
-
-        assert len(records) == 1
-        assert records[0].group_id is None
-
-
-class TestRecordBuilderDecomposed:
-    """
-    Granular unit tests
-    """
-
-    @pytest.fixture
-    def builder(self):
-        from agents.scribe.record_builder import RecordBuilder
-        from agents.common.schemas import Sensitivity
-        return RecordBuilder(default_sensitivity=Sensitivity.INTERNAL)
-
-    @pytest.fixture
-    def sample_raw_event(self):
-        from agents.scribe.record_builder import RawEvent
-        return RawEvent(
-            text="Test text",
-            user="user1",
-            channel="chan1",
-            timestamp="1706799600",
-            source="slack",
-        )
-
-    @pytest.fixture
-    def sample_detection(self):
-        from agents.scribe.detector import DetectionResult
-        return DetectionResult(
-            is_significant=True,
-            confidence=0.9,
-            matched_pattern="pattern",
-            domain="architecture",
-        )
-
-    def test_build_single_record_title_fallback(self, builder, sample_raw_event, sample_detection):
-        """Single record builder falls back to _extract_title if fields.title is missing"""
-        from agents.scribe.llm_extractor import ExtractedFields, ExtractionResult
-
-        fields = ExtractedFields(
-            title="",  # Missing title
-            rationale="Because reasons",
-            problem="Issue X",
-        )
-        extraction = ExtractionResult(single=fields)
-
-        # Access internal method to demonstrate testability
-        record = builder._build_single_record_from_extraction(
-            fields=fields,
-            raw_event=sample_raw_event,
-            clean_text="Decided to use X over Y",
-            detection=sample_detection,
-            extraction=extraction,
-            redaction_notes="Redacted 1 [API_KEY]",
-        )
-
-        # Title should have been extracted from clean_text/detection since fields.title was empty
-        assert record.title != ""
-        assert "Decided to use X" in record.title
-        assert record.quality.review_notes == "Redacted 1 [API_KEY]"
-
-    def test_build_multi_record_invalid_timestamp(self, builder, sample_detection):
-        """Multi record builder handles invalid timestamps in raw_event gracefully"""
-        from agents.scribe.record_builder import RawEvent
-        from agents.scribe.llm_extractor import ExtractionResult, PhaseExtractedFields
-
-        bad_event = RawEvent(
-            text="test", user="u", channel="c",
-            timestamp="invalid-timestamp", # This would cause float() to fail
-            source="slack"
-        )
-        extraction = ExtractionResult(
-            group_title="Title",
-            phases=[PhaseExtractedFields(phase_title="P1", phase_decision="D1")]
-        )
-
-        records = builder._build_multi_record_from_extraction(
-            extraction=extraction,
-            raw_event=bad_event,
-            clean_text="test",
-            detection=sample_detection,
-            redaction_notes=None
-        )
-
-        assert len(records) == 1
-        # timestamp parsing failure should result in keeping the raw string
-        assert records[0].decision.when == "invalid-timestamp"
-
-    def test_build_multi_record_group_metadata(self, builder, sample_raw_event, sample_detection):
-        """Group metadata (summary, types) is correctly mapped in multi-record build"""
-        from agents.scribe.llm_extractor import ExtractionResult, PhaseExtractedFields
-
-        extraction = ExtractionResult(
-            group_title="Global Group",
-            group_type="bundle",
-            group_summary="Overall summary",
-            phases=[
-                PhaseExtractedFields(phase_title="P1", phase_decision="D1"),
-                PhaseExtractedFields(phase_title="P2", phase_decision="D2")
-            ]
-        )
-
-        records = builder._build_multi_record_from_extraction(
-            extraction=extraction,
-            raw_event=sample_raw_event,
-            clean_text="test",
-            detection=sample_detection,
-            redaction_notes=None
-        )
-
-        assert len(records) == 2
-        assert records[0].group_type == "bundle"
-        assert records[0].group_summary == "Overall summary"
-        assert "_b0" in records[0].id
-        assert "_b1" in records[1].id
diff --git a/agents/tests/test_retriever.py b/agents/tests/test_retriever.py
deleted file mode 100644
index 35117c3..0000000
--- a/agents/tests/test_retriever.py
+++ /dev/null
@@ -1,871 +0,0 @@
-"""
-Tests for Retriever Agent
-
-Tests query processing, searching, and synthesis.
-"""
-
-import pytest
-from unittest.mock import Mock, MagicMock
-
-
-class TestQueryProcessor:
-    """Tests for QueryProcessor"""
-
-    @pytest.fixture
-    def processor(self):
-        from agents.retriever.query_processor import QueryProcessor
-        return QueryProcessor()
-
-    def test_parse_decision_rationale_query(self, processor):
-        from agents.retriever.query_processor import QueryIntent
-
-        result = processor.parse("Why did we choose PostgreSQL over MySQL?")
-
-        assert result.intent == QueryIntent.DECISION_RATIONALE
-        assert "PostgreSQL" in result.entities or "postgresql" in result.cleaned
-
-    def test_parse_feature_history_query(self, processor):
-        from agents.retriever.query_processor import QueryIntent
-
-        result = processor.parse("Have customers asked for dark mode?")
-
-        assert result.intent == QueryIntent.FEATURE_HISTORY
-
-    def test_parse_pattern_lookup_query(self, processor):
-        from agents.retriever.query_processor import QueryIntent
-
-        result = processor.parse("How do we handle authentication?")
-
-        assert result.intent == QueryIntent.PATTERN_LOOKUP
-
-    def test_parse_technical_context_query(self, processor):
-        from agents.retriever.query_processor import QueryIntent
-
-        result = processor.parse("What's our architecture for the payment system?")
-
-        assert result.intent == QueryIntent.TECHNICAL_CONTEXT
-
-    def test_parse_general_query(self, processor):
-        from agents.retriever.query_processor import QueryIntent
-
-        result = processor.parse("Tell me about our database")
-
-        assert result.intent == QueryIntent.GENERAL
-
-    def test_time_scope_detection(self, processor):
-        from agents.retriever.query_processor import TimeScope
-
-        result = processor.parse("What decisions did we make last week?")
-        assert result.time_scope == TimeScope.LAST_WEEK
-
-        result = processor.parse("What happened in Q3?")
-        assert result.time_scope == TimeScope.LAST_QUARTER
-
-    def test_entity_extraction_quoted(self, processor):
-        result = processor.parse('Why did we choose "React Native"?')
-
-        assert "React Native" in result.entities
-
-    def test_entity_extraction_capitalized(self, processor):
-        result = processor.parse("Why did we use PostgreSQL instead of MySQL?")
-
-        # Should extract capitalized tech names
-        entities_lower = [e.lower() for e in result.entities]
-        assert "postgresql" in entities_lower or "mysql" in entities_lower
-
-    def test_keyword_extraction(self, processor):
-        result = processor.parse("Why did we choose PostgreSQL for the database?")
-
-        assert "postgresql" in result.keywords or "database" in result.keywords
-
-    def test_query_expansion(self, processor):
-        result = processor.parse("Why PostgreSQL?")
-
-        assert len(result.expanded_queries) > 1
-        # Original should be included
-        assert any("postgresql" in q.lower() for q in result.expanded_queries)
-
-    def test_format_for_search(self, processor):
-        result = processor.parse("Why did we choose PostgreSQL?")
-        formatted = processor.format_for_search(result)
-
-        assert "postgresql" in formatted.lower()
-
-
-class TestSearcher:
-    """Tests for Searcher"""
-
-    @pytest.fixture
-    def mock_client(self):
-        client = Mock()
-        client.score.return_value = {"ok": True, "encrypted_blobs": ["fake_blob"]}
-        client.remind.return_value = {"ok": True, "results": [
-            {"score": 0.8, "metadata": {
-                "id": "dec_2024-01-01_arch_postgres",
-                "title": "Adopt PostgreSQL",
-                "domain": "architecture",
-                "why": {"certainty": "supported"},
-                "payload": {"text": "# Decision Record: Adopt PostgreSQL\n..."},
-            }}
-        ]}
-        return client
-
-    @pytest.fixture
-    def mock_embedding(self):
-        embedding = Mock()
-        embedding.embed_single.return_value = [0.1] * 1024
-        return embedding
-
-    @pytest.fixture
-    def mock_vault(self):
-        from unittest.mock import AsyncMock
-        from dataclasses import dataclass
-
-        @dataclass
-        class VaultResult:
-            ok: bool = True
-            error: str = ""
-            results: list = None
-
-        vault = AsyncMock()
-        vault.decrypt_search_results.return_value = VaultResult(
-            ok=True,
-            results=[{"shard_idx": 0, "row_idx": 0, "score": 0.8}],
-        )
-        vault.decrypt_metadata.return_value = [
-            {"id": "dec_2024-01-01_arch_postgres",
-             "title": "Adopt PostgreSQL",
-             "domain": "architecture",
-             "why": {"certainty": "supported"},
-             "payload": {"text": "# Decision Record: Adopt PostgreSQL\n..."}}
-        ]
-        return vault
-
-    @pytest.fixture
-    def searcher(self, mock_client, mock_embedding, mock_vault):
-        from agents.retriever.searcher import Searcher
-        return Searcher(mock_client, mock_embedding, "test-collection", vault_client=mock_vault)
-
-    @pytest.mark.asyncio
-    async def test_search_returns_results(self, searcher):
-        from agents.retriever.query_processor import QueryProcessor
-
-        processor = QueryProcessor()
-        query = processor.parse("Why PostgreSQL?")
-
-        results = await searcher.search(query)
-
-        assert len(results) > 0
-        assert results[0].record_id == "dec_2024-01-01_arch_postgres"
-
-    @pytest.mark.asyncio
-    async def test_search_result_has_payload_text(self, searcher):
-        from agents.retriever.query_processor import QueryProcessor
-
-        processor = QueryProcessor()
-        query = processor.parse("Why PostgreSQL?")
-
-        results = await searcher.search(query)
-
-        assert results[0].payload_text != ""
-        assert "Decision Record" in results[0].payload_text
-
-    @pytest.mark.asyncio
-    async def test_search_result_has_certainty(self, searcher):
-        from agents.retriever.query_processor import QueryProcessor
-
-        processor = QueryProcessor()
-        query = processor.parse("Why PostgreSQL?")
-
-        results = await searcher.search(query)
-
-        assert results[0].certainty == "supported"
-
-    @pytest.mark.asyncio
-    async def test_is_reliable_check(self, searcher):
-        from agents.retriever.query_processor import QueryProcessor
-
-        processor = QueryProcessor()
-        query = processor.parse("Why PostgreSQL?")
-
-        results = await searcher.search(query)
-
-        # Supported certainty should be reliable
-        assert results[0].is_reliable is True
-
-
-class TestExpandPhaseChains:
-    """Tests for phase chain expansion in Searcher"""
-
-    @pytest.fixture
-    def mock_client(self):
-        client = Mock()
-        client.score.return_value = {"ok": True, "encrypted_blobs": ["fake_blob"]}
-        client.remind.return_value = {"ok": True, "results": []}
-        return client
-
-    @pytest.fixture
-    def mock_embedding(self):
-        embedding = Mock()
-        embedding.embed_single.return_value = [0.1] * 1024
-        return embedding
-
-    @pytest.fixture
-    def mock_vault(self):
-        from unittest.mock import AsyncMock
-        from dataclasses import dataclass
-
-        @dataclass
-        class VaultResult:
-            ok: bool = True
-            error: str = ""
-            results: list = None
-
-        vault = AsyncMock()
-        vault.decrypt_search_results.return_value = VaultResult(ok=True, results=[])
-        return vault
-
-    @pytest.fixture
-    def searcher(self, mock_client, mock_embedding, mock_vault):
-        from agents.retriever.searcher import Searcher
-        return Searcher(mock_client, mock_embedding, "test-collection", vault_client=mock_vault)
-
-    def _make_result(self, record_id, group_id=None, group_type=None, phase_seq=None, phase_total=None, score=0.8):
-        from agents.retriever.searcher import SearchResult
-        return SearchResult(
-            record_id=record_id,
-            title=f"Title {record_id}",
-            payload_text=f"Content of {record_id}",
-            domain="architecture",
-            certainty="supported",
-            status="accepted",
-            score=score,
-            group_id=group_id,
-            group_type=group_type,
-            phase_seq=phase_seq,
-            phase_total=phase_total,
-        )
-
-    @pytest.mark.asyncio
-    async def test_expand_fetches_siblings(self, searcher):
-        """Test that expansion fetches siblings for phase results"""
-        grp = "grp_2026-01-01_arch_plg"
-        results = [
-            self._make_result("dec_p0", group_id=grp, group_type="phase_chain", phase_seq=0, phase_total=3),
-        ]
-
-        # Mock _search_single to return siblings
-        sibling_results = [
-            self._make_result("dec_p1", group_id=grp, group_type="phase_chain", phase_seq=1, phase_total=3),
-            self._make_result("dec_p2", group_id=grp, group_type="phase_chain", phase_seq=2, phase_total=3),
-        ]
-        from unittest.mock import AsyncMock
-        searcher._search_single = AsyncMock(return_value=sibling_results)
-
-        expanded = await searcher._expand_phase_chains(results)
-
-        # Should have fetched siblings
-        searcher._search_single.assert_called_once()
-        # Should contain original + siblings ordered by phase_seq
-        assert len(expanded) == 3  # Original (dec_p0) + 2 fetched siblings
-        assert expanded[0].phase_seq == 0
-        assert expanded[1].phase_seq == 1
-        assert expanded[2].phase_seq == 2
-
-    @pytest.mark.asyncio
-    async def test_expand_orders_by_phase_seq(self, searcher):
-        """Test that expanded results are ordered by phase_seq"""
-        grp = "grp_2026-01-01_arch_test"
-        results = [
-            self._make_result("dec_p1", group_id=grp, group_type="phase_chain", phase_seq=1, phase_total=3),
-        ]
-
-        # Return siblings out of order
-        from unittest.mock import AsyncMock
-        searcher._search_single = AsyncMock(return_value=[
-            self._make_result("dec_p2", group_id=grp, group_type="phase_chain", phase_seq=2, phase_total=3),
-            self._make_result("dec_p0", group_id=grp, group_type="phase_chain", phase_seq=0, phase_total=3),
-        ])
-
-        expanded = await searcher._expand_phase_chains(results)
-
-        # Siblings should be sorted by phase_seq
-        seqs = [r.phase_seq for r in expanded]
-        assert seqs == sorted(seqs)
-
-    @pytest.mark.asyncio
-    async def test_expand_no_duplicate_record_ids(self, searcher):
-        """Test that expanded results have no duplicate record_ids"""
-        grp = "grp_2026-01-01_arch_dedup"
-        results = [
-            self._make_result("dec_p0", group_id=grp, group_type="phase_chain", phase_seq=0, phase_total=2),
-        ]
-
-        # Sibling search returns the original + new one
-        from unittest.mock import AsyncMock
-        searcher._search_single = AsyncMock(return_value=[
-            self._make_result("dec_p0", group_id=grp, group_type="phase_chain", phase_seq=0, phase_total=2),
-            self._make_result("dec_p1", group_id=grp, group_type="phase_chain", phase_seq=1, phase_total=2),
-        ])
-
-        expanded = await searcher._expand_phase_chains(results)
-
-        # No duplicates
-        ids = [r.record_id for r in expanded]
-        assert len(ids) == len(set(ids))
-
-    @pytest.mark.asyncio
-    async def test_expand_standalone_untouched(self, searcher):
-        """Test that standalone (non-phase) results are passed through"""
-        results = [
-            self._make_result("dec_standalone", score=0.9),
-        ]
-
-        from unittest.mock import AsyncMock
-        searcher._search_single = AsyncMock()
-
-        expanded = await searcher._expand_phase_chains(results)
-
-        # No search should be made for non-phase results
-        searcher._search_single.assert_not_called()
-        assert len(expanded) == 1
-        assert expanded[0].record_id == "dec_standalone"
-
-
-class TestSynthesizer:
-    """Tests for Synthesizer"""
-
-    @pytest.fixture
-    def synthesizer_no_llm(self):
-        from agents.retriever.synthesizer import Synthesizer
-        return Synthesizer(anthropic_api_key=None)
-
-    @pytest.fixture
-    def sample_results(self):
-        from agents.retriever.searcher import SearchResult
-
-        return [
-            SearchResult(
-                record_id="dec_2024-01-01_arch_postgres",
-                title="Adopt PostgreSQL",
-                payload_text="# Decision Record: Adopt PostgreSQL\n\nWe chose PostgreSQL for better JSON support.",
-                domain="architecture",
-                certainty="supported",
-                status="accepted",
-                score=0.85,
-            ),
-            SearchResult(
-                record_id="dec_2024-01-02_arch_redis",
-                title="Use Redis for caching",
-                payload_text="# Decision Record: Use Redis\n\nRedis for caching due to performance.",
-                domain="architecture",
-                certainty="partially_supported",
-                status="accepted",
-                score=0.75,
-            ),
-        ]
-
-    @pytest.fixture
-    def sample_query(self):
-        from agents.retriever.query_processor import QueryProcessor
-
-        processor = QueryProcessor()
-        return processor.parse("Why did we choose PostgreSQL?")
-
-    def test_has_llm_property(self, synthesizer_no_llm):
-        assert synthesizer_no_llm.has_llm is False
-
-    def test_synthesize_no_results(self, synthesizer_no_llm, sample_query):
-        result = synthesizer_no_llm.synthesize(sample_query, [])
-
-        assert "No relevant records found" in result.answer
-        assert result.confidence == 0.0
-        assert len(result.sources) == 0
-
-    def test_synthesize_fallback(self, synthesizer_no_llm, sample_query, sample_results):
-        result = synthesizer_no_llm.synthesize(sample_query, sample_results)
-
-        # Should use fallback formatting
-        assert "Search Results" in result.answer or "PostgreSQL" in result.answer
-        assert len(result.sources) > 0
-        assert any("LLM not available" in w for w in result.warnings)
-
-    def test_confidence_calculation(self, synthesizer_no_llm, sample_query, sample_results):
-        result = synthesizer_no_llm.synthesize(sample_query, sample_results)
-
-        # Should have some confidence based on results
-        assert result.confidence > 0.0
-        assert result.confidence <= 1.0
-
-    def test_sources_extraction(self, synthesizer_no_llm, sample_query, sample_results):
-        result = synthesizer_no_llm.synthesize(sample_query, sample_results)
-
-        assert len(result.sources) == 2
-        assert result.sources[0]["record_id"] == "dec_2024-01-01_arch_postgres"
-        assert result.sources[0]["certainty"] == "supported"
-
-    def test_warnings_for_uncertain_evidence(self, synthesizer_no_llm, sample_query):
-        from agents.retriever.searcher import SearchResult
-
-        uncertain_results = [
-            SearchResult(
-                record_id="dec_1",
-                title="Uncertain decision",
-                payload_text="Some decision",
-                domain="general",
-                certainty="unknown",
-                status="proposed",
-                score=0.7,
-            ),
-        ]
-
-        result = synthesizer_no_llm.synthesize(sample_query, uncertain_results)
-
-        # Should have warning about uncertain/unknown evidence or LLM not available
-        has_warning = any("uncertain" in w.lower() or "unknown" in w.lower() or "LLM" in w for w in result.warnings)
-        assert has_warning
-
-    def test_related_queries_suggestion(self, synthesizer_no_llm, sample_query, sample_results):
-        result = synthesizer_no_llm.synthesize(sample_query, sample_results)
-
-        assert len(result.related_queries) > 0
-
-
-class TestSynthesizerGrouping:
-    """Tests for phase chain / bundle grouping in Synthesizer._format_records_for_prompt"""
-
-    @pytest.fixture
-    def synthesizer_no_llm(self):
-        from agents.retriever.synthesizer import Synthesizer
-        return Synthesizer(anthropic_api_key=None)
-
-    def _make_result(self, record_id, group_id=None, group_type=None, phase_seq=None, phase_total=None, title="Test", score=0.8):
-        from agents.retriever.searcher import SearchResult
-        return SearchResult(
-            record_id=record_id,
-            title=title,
-            payload_text=f"Content of {record_id}",
-            domain="architecture",
-            certainty="supported",
-            status="accepted",
-            score=score,
-            group_id=group_id,
-            group_type=group_type,
-            phase_seq=phase_seq,
-            phase_total=phase_total,
-        )
-
-    def test_phase_chain_grouped_as_single_block(self, synthesizer_no_llm):
-        """Test that phase chain results render as one 'Phase Chain' block"""
-        grp = "grp_2026-01-01_arch_strategy"
-        results = [
-            self._make_result("dec_p0", group_id=grp, group_type="phase_chain", phase_seq=0, phase_total=3, title="Market Analysis"),
-            self._make_result("dec_p1", group_id=grp, group_type="phase_chain", phase_seq=1, phase_total=3, title="Pricing Model"),
-            self._make_result("dec_p2", group_id=grp, group_type="phase_chain", phase_seq=2, phase_total=3, title="Roadmap"),
-        ]
-
-        formatted = synthesizer_no_llm._format_records_for_prompt(results)
-
-        assert "Phase Chain" in formatted
-        assert "Phase 1/3" in formatted
-        assert "Phase 2/3" in formatted
-        assert "Phase 3/3" in formatted
-        # Should be a single record block, not three separate ones
-        assert formatted.count("Record ") == 1
-
-    def test_bundle_grouped_as_single_block(self, synthesizer_no_llm):
-        """Test that bundle results render as one 'Decision Bundle' block"""
-        grp = "grp_2026-01-01_product_auth"
-        results = [
-            self._make_result("dec_b0", group_id=grp, group_type="bundle", phase_seq=0, phase_total=2, title="Auth Method"),
-            self._make_result("dec_b1", group_id=grp, group_type="bundle", phase_seq=1, phase_total=2, title="Token Storage"),
-        ]
-
-        formatted = synthesizer_no_llm._format_records_for_prompt(results)
-
-        assert "Decision Bundle" in formatted
-        assert "Facet 1" in formatted
-        assert "Facet 2" in formatted
-        assert formatted.count("Record ") == 1
-
-    def test_standalone_formatted_individually(self, synthesizer_no_llm):
-        """Test that standalone records are formatted individually"""
-        results = [
-            self._make_result("dec_standalone1", title="Choose PostgreSQL"),
-            self._make_result("dec_standalone2", title="Use Redis"),
-        ]
-
-        formatted = synthesizer_no_llm._format_records_for_prompt(results)
-
-        assert "Choose PostgreSQL" in formatted
-        assert "Use Redis" in formatted
-        assert "Phase Chain" not in formatted
-        assert "Decision Bundle" not in formatted
-        assert formatted.count("Record ") == 2
-
-    def test_mixed_grouped_and_standalone(self, synthesizer_no_llm):
-        """Test mix of grouped and standalone results"""
-        grp = "grp_2026-01-01_arch_mix"
-        results = [
-            self._make_result("dec_p0", group_id=grp, group_type="phase_chain", phase_seq=0, phase_total=2, title="Phase A"),
-            self._make_result("dec_p1", group_id=grp, group_type="phase_chain", phase_seq=1, phase_total=2, title="Phase B"),
-            self._make_result("dec_standalone", title="Standalone Decision"),
-        ]
-
-        formatted = synthesizer_no_llm._format_records_for_prompt(results)
-
-        assert "Phase Chain" in formatted
-        assert "Standalone Decision" in formatted
-        # 1 grouped block + 1 standalone = 2 record blocks
-        assert formatted.count("Record ") == 2
-
-    def test_phases_ordered_by_phase_seq(self, synthesizer_no_llm):
-        """Test that phases within a group are ordered by phase_seq"""
-        grp = "grp_2026-01-01_arch_order"
-        # Deliberately out of order
-        results = [
-            self._make_result("dec_p2", group_id=grp, group_type="phase_chain", phase_seq=2, phase_total=3, title="Third"),
-            self._make_result("dec_p0", group_id=grp, group_type="phase_chain", phase_seq=0, phase_total=3, title="First"),
-            self._make_result("dec_p1", group_id=grp, group_type="phase_chain", phase_seq=1, phase_total=3, title="Second"),
-        ]
-
-        formatted = synthesizer_no_llm._format_records_for_prompt(results)
-
-        # Phases should appear in seq order
-        pos_first = formatted.index("First")
-        pos_second = formatted.index("Second")
-        pos_third = formatted.index("Third")
-        assert pos_first < pos_second < pos_third
-
-
-class TestQueryProcessorMultilingual:
-    """Tests for multilingual query processing"""
-
-    @pytest.fixture
-    def processor_no_llm(self):
-        """QueryProcessor without LLM (regex fallback for all languages)"""
-        from agents.retriever.query_processor import QueryProcessor
-        return QueryProcessor()
-
-    @pytest.fixture
-    def mock_llm_processor(self):
-        """QueryProcessor with mocked LLM client"""
-        from agents.retriever.query_processor import QueryProcessor
-        import json
-
-        processor = QueryProcessor()
-
-        # Mock the LLMClient
-        mock_llm = Mock()
-        mock_llm.is_available = True
-        mock_llm.generate.return_value = json.dumps({
-            "intent": "decision_rationale",
-            "english_query": "Why did we choose PostgreSQL?",
-            "entities": ["PostgreSQL"],
-            "keywords": ["choose", "database", "postgresql"],
-            "time_scope": "all_time",
-        })
-        processor._llm = mock_llm
-
-        return processor
-
-    def test_korean_query_detected_as_non_english(self, processor_no_llm):
-        """Test that Korean query gets language=ko in ParsedQuery"""
-        result = processor_no_llm.parse("왜 PostgreSQL을 선택했나요?")
-
-        assert result.language is not None
-        assert result.language.code == "ko"
-
-    def test_english_query_detected_as_english(self, processor_no_llm):
-        """Test that English query gets language=en"""
-        result = processor_no_llm.parse("Why did we choose PostgreSQL?")
-
-        assert result.language is not None
-        assert result.language.code == "en"
-
-    def test_korean_query_falls_back_to_regex_without_llm(self, processor_no_llm):
-        """Test that Korean query uses regex path when no LLM available"""
-        result = processor_no_llm.parse("왜 PostgreSQL을 선택했나요?")
-
-        # Should still return a valid ParsedQuery
-        assert result.original == "왜 PostgreSQL을 선택했나요?"
-        assert result.intent is not None
-
-    def test_korean_query_uses_llm_when_available(self, mock_llm_processor):
-        """Test that Korean query uses LLM for intent classification"""
-        from agents.retriever.query_processor import QueryIntent
-
-        result = mock_llm_processor.parse("왜 PostgreSQL을 선택했나요?")
-
-        # LLM should be called
-        mock_llm_processor._llm.generate.assert_called_once()
-
-        assert result.intent == QueryIntent.DECISION_RATIONALE
-        assert "PostgreSQL" in result.entities
-
-    def test_multilingual_expanded_queries(self, mock_llm_processor):
-        """Test that expanded_queries includes both original and English translation"""
-        result = mock_llm_processor.parse("왜 PostgreSQL을 선택했나요?")
-
-        # Should include original Korean query AND English translation
-        assert any("PostgreSQL을" in q for q in result.expanded_queries)
-        assert any("Why" in q or "choose" in q.lower() for q in result.expanded_queries)
-
-    def test_english_query_skips_llm(self, mock_llm_processor):
-        """Test that English query does NOT use LLM even when available"""
-        result = mock_llm_processor.parse("Why did we choose PostgreSQL?")
-
-        # LLM should NOT be called for English
-        mock_llm_processor._llm.generate.assert_not_called()
-
-    def test_llm_parse_failure_falls_back(self, mock_llm_processor):
-        """Test that LLM failure falls back to regex parsing"""
-        mock_llm_processor._llm.generate.side_effect = Exception("API error")
-
-        result = mock_llm_processor.parse("왜 PostgreSQL을 선택했나요?")
-
-        # Should still return a valid ParsedQuery (regex fallback)
-        assert result is not None
-        assert result.original == "왜 PostgreSQL을 선택했나요?"
-
-    def test_parsed_query_language_field(self, processor_no_llm):
-        """Test ParsedQuery.language field is populated"""
-        result = processor_no_llm.parse("Tell me about our database")
-
-        assert result.language is not None
-        assert result.language.code == "en"
-        assert result.language.is_english is True
-
-
-class TestSynthesizerMultilingual:
-    """Tests for multilingual synthesis"""
-
-    @pytest.fixture
-    def synthesizer_no_llm(self):
-        from agents.retriever.synthesizer import Synthesizer
-        return Synthesizer(anthropic_api_key=None)
-
-    @pytest.fixture
-    def sample_results(self):
-        from agents.retriever.searcher import SearchResult
-
-        return [
-            SearchResult(
-                record_id="dec_2024-01-01_arch_postgres",
-                title="Adopt PostgreSQL",
-                payload_text="# Decision Record: Adopt PostgreSQL\n\nWe chose PostgreSQL for better JSON support.",
-                domain="architecture",
-                certainty="supported",
-                status="accepted",
-                score=0.85,
-            ),
-        ]
-
-    def test_korean_fallback_template(self, synthesizer_no_llm, sample_results):
-        """Test that Korean queries get Korean fallback template"""
-        from agents.retriever.query_processor import ParsedQuery, QueryIntent
-        from agents.common.language import LanguageInfo
-
-        query = ParsedQuery(
-            original="왜 PostgreSQL을 선택했나요?",
-            cleaned="왜 postgresql을 선택했나요?",
-            intent=QueryIntent.DECISION_RATIONALE,
-            language=LanguageInfo(code="ko", confidence=0.95, script="Hangul"),
-        )
-
-        result = synthesizer_no_llm.synthesize(query, sample_results)
-
-        assert "검색 결과" in result.answer
-
-    def test_japanese_fallback_template(self, synthesizer_no_llm, sample_results):
-        """Test that Japanese queries get Japanese fallback template"""
-        from agents.retriever.query_processor import ParsedQuery, QueryIntent
-        from agents.common.language import LanguageInfo
-
-        query = ParsedQuery(
-            original="なぜPostgreSQLを選んだのですか？",
-            cleaned="なぜpostgresqlを選んだのですか？",
-            intent=QueryIntent.DECISION_RATIONALE,
-            language=LanguageInfo(code="ja", confidence=0.90, script="Kana"),
-        )
-
-        result = synthesizer_no_llm.synthesize(query, sample_results)
-
-        assert "検索結果" in result.answer
-
-    def test_english_fallback_template(self, synthesizer_no_llm, sample_results):
-        """Test that English queries get English fallback template"""
-        from agents.retriever.query_processor import ParsedQuery, QueryIntent
-        from agents.common.language import LanguageInfo
-
-        query = ParsedQuery(
-            original="Why did we choose PostgreSQL?",
-            cleaned="why did we choose postgresql?",
-            intent=QueryIntent.DECISION_RATIONALE,
-            language=LanguageInfo(code="en", confidence=0.99, script="Latin"),
-        )
-
-        result = synthesizer_no_llm.synthesize(query, sample_results)
-
-        assert "Search Results" in result.answer
-
-    def test_unknown_language_falls_back_to_english(self, synthesizer_no_llm, sample_results):
-        """Test that unknown language code uses English template"""
-        from agents.retriever.query_processor import ParsedQuery, QueryIntent
-        from agents.common.language import LanguageInfo
-
-        query = ParsedQuery(
-            original="Warum haben wir PostgreSQL gewahlt?",
-            cleaned="warum haben wir postgresql gewahlt?",
-            intent=QueryIntent.GENERAL,
-            language=LanguageInfo(code="de", confidence=0.85, script="Latin"),
-        )
-
-        result = synthesizer_no_llm.synthesize(query, sample_results)
-
-        assert "Search Results" in result.answer
-
-    def test_no_language_uses_english_template(self, synthesizer_no_llm, sample_results):
-        """Test that ParsedQuery without language field uses English"""
-        from agents.retriever.query_processor import ParsedQuery, QueryIntent
-
-        query = ParsedQuery(
-            original="Why PostgreSQL?",
-            cleaned="why postgresql?",
-            intent=QueryIntent.GENERAL,
-            language=None,
-        )
-
-        result = synthesizer_no_llm.synthesize(query, sample_results)
-
-        assert "Search Results" in result.answer
-
-
-class TestDisplayTextLocalization:
-    """Tests for render_display_text localization"""
-
-    @pytest.fixture
-    def sample_record(self):
-        from agents.common.schemas import (
-            DecisionRecord, DecisionDetail, Context, Why, Evidence,
-            SourceRef, Quality, Payload, Domain, Sensitivity, Status,
-            Certainty, ReviewState, SourceType
-        )
-
-        return DecisionRecord(
-            id="dec_2024-02-01_arch_postgres",
-            domain=Domain.ARCHITECTURE,
-            sensitivity=Sensitivity.INTERNAL,
-            status=Status.ACCEPTED,
-            title="Adopt PostgreSQL",
-            decision=DecisionDetail(
-                what="Use PostgreSQL as primary database",
-                who=["user:alice"],
-                where="slack:#architecture",
-                when="2024-02-01",
-            ),
-            context=Context(problem="Need reliable database"),
-            why=Why(
-                rationale_summary="Better JSON support",
-                certainty=Certainty.SUPPORTED,
-            ),
-            evidence=[],
-            tags=["database"],
-            quality=Quality(scribe_confidence=0.9),
-            payload=Payload(format="markdown", text=""),
-        )
-
-    def test_render_display_text_english(self, sample_record):
-        from agents.common.schemas.templates import render_display_text
-
-        text = render_display_text(sample_record, language="en")
-
-        assert "Decision Record" in text
-        assert "Alternatives Considered" in text
-
-    def test_render_display_text_korean(self, sample_record):
-        from agents.common.schemas.templates import render_display_text
-
-        text = render_display_text(sample_record, language="ko")
-
-        assert "결정 기록" in text
-        assert "검토한 대안" in text
-
-    def test_render_display_text_japanese(self, sample_record):
-        from agents.common.schemas.templates import render_display_text
-
-        text = render_display_text(sample_record, language="ja")
-
-        assert "決定記録" in text
-        assert "検討した代替案" in text
-
-    def test_render_display_text_unknown_lang_falls_back(self, sample_record):
-        from agents.common.schemas.templates import render_display_text
-
-        text = render_display_text(sample_record, language="fr")
-
-        # Should fall back to English
-        assert "Decision Record" in text
-
-    def test_render_payload_text_always_english(self, sample_record):
-        """Verify render_payload_text is always English (for embedding consistency)"""
-        from agents.common.schemas.templates import render_payload_text
-
-        text = render_payload_text(sample_record)
-
-        assert "Decision Record" in text
-        assert "Alternatives Considered" in text
-
-
-class TestFormatAnswerForDisplay:
-    """Tests for answer formatting"""
-
-    def test_format_complete_answer(self):
-        from agents.retriever.synthesizer import SynthesizedAnswer, format_answer_for_display
-
-        answer = SynthesizedAnswer(
-            answer="PostgreSQL was chosen for better JSON support.",
-            confidence=0.85,
-            sources=[
-                {"record_id": "dec_1", "title": "PostgreSQL Decision", "certainty": "supported"}
-            ],
-            related_queries=["What alternatives were considered?"],
-            warnings=["1 record(s) have partial evidence"],
-        )
-
-        formatted = format_answer_for_display(answer)
-
-        assert "PostgreSQL was chosen" in formatted
-        assert "85%" in formatted
-        assert "dec_1" in formatted
-        assert "alternatives" in formatted.lower()
-
-
-def test_search_result_has_reusable_insight():
-    """SearchResult should carry reusable_insight from metadata."""
-    from agents.retriever.searcher import SearchResult
-
-    # Schema 2.1
-    r = SearchResult(
-        record_id="dec_test",
-        title="Test",
-        payload_text="Verbose markdown",
-        reusable_insight="Dense gist",
-        domain="architecture",
-        certainty="supported",
-        status="accepted",
-        score=0.8,
-    )
-    assert r.reusable_insight == "Dense gist"
-
-    # Schema 2.0 default
-    r2 = SearchResult(
-        record_id="dec_test2",
-        title="Test2",
-        payload_text="Verbose",
-        domain="architecture",
-        certainty="supported",
-        status="accepted",
-        score=0.8,
-    )
-    assert r2.reusable_insight == ""
diff --git a/agents/tests/test_schemas.py b/agents/tests/test_schemas.py
deleted file mode 100644
index 5266e54..0000000
--- a/agents/tests/test_schemas.py
+++ /dev/null
@@ -1,28 +0,0 @@
-"""Tests for DecisionRecord schema."""
-import pytest
-
-
-def test_reusable_insight_field():
-    """DecisionRecord should have reusable_insight field with schema 2.1."""
-    from agents.common.schemas import DecisionRecord, DecisionDetail, Domain
-
-    record = DecisionRecord(
-        id="dec_2026-03-28_architecture_test",
-        title="Test",
-        decision=DecisionDetail(what="Test decision"),
-        reusable_insight="We chose PostgreSQL over MongoDB because ACID compliance is critical.",
-    )
-    assert record.reusable_insight == "We chose PostgreSQL over MongoDB because ACID compliance is critical."
-    assert record.schema_version == "2.1"
-
-
-def test_reusable_insight_defaults_empty():
-    """reusable_insight should default to empty string for backward compat."""
-    from agents.common.schemas import DecisionRecord, DecisionDetail
-
-    record = DecisionRecord(
-        id="dec_2026-03-28_architecture_test",
-        title="Test",
-        decision=DecisionDetail(what="Test decision"),
-    )
-    assert record.reusable_insight == ""
diff --git a/agents/tests/test_team_day_scenario.py b/agents/tests/test_team_day_scenario.py
deleted file mode 100644
index 6e13349..0000000
--- a/agents/tests/test_team_day_scenario.py
+++ /dev/null
@@ -1,838 +0,0 @@
-"""
-Multi-Day Team Scenario Test
-
-Simulates a realistic 3-day workflow for a 3-person team (Alice, Bob, Charlie)
-where conversations happen sequentially across channels, decisions emerge from
-discussion threads, and cross-member recall validates organizational memory.
-
-Day 1: Architecture Sprint Planning
-  - Alice proposes database migration
-  - Bob raises performance concerns
-  - Charlie flags security requirements
-  - Team decides on PostgreSQL + encryption at rest
-  - Casual chat and status updates mixed in
-
-Day 2: Implementation Decisions
-  - Alice finalizes API design (gRPC)
-  - Bob handles feature flag rollout (LaunchDarkly)
-  - Charlie establishes security review policy
-  - Cross-references to Day 1 decisions
-
-Day 3: Incident Response + Recall
-  - Production incident triggers policy review
-  - Team recalls previous decisions to inform response
-  - Post-mortem generates new learnings
-  - Retriever validates cross-member recall
-
-Each day tests the full 3-tier pipeline:
-  Tier 1: Embedding similarity → wide net
-  Tier 2: Haiku policy filter → false positive removal
-  Tier 3: Sonnet extraction → Decision Record building
-"""
-
-import json
-import pytest
-from datetime import datetime, timedelta
-from unittest.mock import Mock
-from typing import List, Dict, Any, Optional
-from dataclasses import dataclass, field
-
-
-# ============================================================================
-# Day Scenario Data Structures
-# ============================================================================
-
-@dataclass
-class ChannelMessage:
-    """A message in a Slack channel with metadata"""
-    timestamp: str  # Simulated Slack ts
-    user: str
-    channel: str
-    text: str
-    thread_ts: Optional[str] = None  # If thread reply
-    expected_capture: bool = False
-    expected_domain: Optional[str] = None
-    note: str = ""
-
-
-@dataclass
-class DayScenario:
-    """A day's worth of team conversations"""
-    day: int
-    title: str
-    messages: List[ChannelMessage]
-    expected_captures: int = 0  # How many should be captured
-    expected_rejects: int = 0   # How many should be filtered out
-
-
-# ============================================================================
-# Tier 2 Simulation (same as test_pipeline_scenario.py)
-# ============================================================================
-
-def simulate_tier2(text: str) -> dict:
-    """Simulate Tier 2 Haiku judgment"""
-    text_lower = text.lower()
-
-    casual_signals = [
-        "good morning", "hope everyone", "heading to", "sounds good",
-        "standup in", "hey everyone", "thanks for", "happy hour",
-        "pizza", "team lunch", "conference room", "offsite",
-        "coffee", "grab lunch", "see you", "have a great",
-        "sounds like a plan", "no worries", "let me check",
-        "sure thing", "will do", "on it",
-        "sushi", "ramen", "thai food", "team dinner",
-    ]
-    for s in casual_signals:
-        if s in text_lower:
-            return {"capture": False, "reason": "Casual/social", "domain": "general"}
-
-    status_signals = [
-        "still working on", "looking into it", "updated the",
-        "merged the pr", "tests are green", "running the",
-        "was patched", "no action needed", "please update",
-        "should have it by", "currently broken", "deploying now",
-        "build passed", "build failed", "restarting the",
-        "checking the logs", "monitoring the", "seems fine now",
-        "root cause identified", "incident resolved", "error rate back to",
-        "deploying the", "will monitor",
-    ]
-    for s in status_signals:
-        if s in text_lower:
-            return {"capture": False, "reason": "Status update", "domain": "general"}
-
-    vague_signals = [
-        "maybe we should", "might be", "sometime",
-        "should look into", "just my personal", "kind of",
-        "interesting approach", "i wonder if", "not sure yet",
-        "could potentially", "thinking about",
-    ]
-    for s in vague_signals:
-        if s in text_lower:
-            return {"capture": False, "reason": "Vague opinion", "domain": "general"}
-
-    question_only = "?" in text and not any(
-        w in text_lower for w in ["decided", "agreed", "going with", "policy", "mandate", "chose"]
-    )
-    if question_only and len(text) < 150:
-        return {"capture": False, "reason": "Question without decision", "domain": "general"}
-
-    # Determine domain for captures
-    domain = "general"
-    if any(w in text_lower for w in ["security", "mtls", "encryption", "auth", "compliance", "vulnerability", "certificate", "audit"]):
-        domain = "security"
-    elif any(w in text_lower for w in ["architecture", "postgresql", "grpc", "kafka", "microservice", "typescript", "database", "schema"]):
-        domain = "architecture"
-    elif any(w in text_lower for w in ["deployment", "devops", "ci/cd", "blue-green", "rollback", "canary"]):
-        domain = "ops"
-    elif any(w in text_lower for w in ["sprint", "feature flag", "prioritiz", "customer", "product", "roadmap"]):
-        domain = "product"
-    elif any(w in text_lower for w in ["performance", "bottleneck", "latency", "benchmark", "cache", "n+1"]):
-        domain = "architecture"
-    elif any(w in text_lower for w in ["postmortem", "outage", "incident", "circuit breaker", "retry", "lesson"]):
-        domain = "ops"
-
-    return {"capture": True, "reason": "Organizational decision with rationale", "domain": domain}
-
-
-# ============================================================================
-# Day 1: Architecture Sprint Planning
-# ============================================================================
-
-DAY1 = DayScenario(
-    day=1,
-    title="Architecture Sprint Planning",
-    messages=[
-        # 09:00 — Morning casual
-        ChannelMessage(
-            timestamp="1707897600.000100",
-            user="alice", channel="#general",
-            text="Good morning team! Hope everyone had a great weekend. Ready for sprint planning?",
-            expected_capture=False, note="Casual greeting",
-        ),
-        ChannelMessage(
-            timestamp="1707897660.000200",
-            user="bob", channel="#general",
-            text="Hey everyone! Coffee is on. Let me check the sprint board real quick.",
-            expected_capture=False, note="Casual + status",
-        ),
-        ChannelMessage(
-            timestamp="1707897720.000300",
-            user="charlie", channel="#general",
-            text="Good morning! I have the security review results to share later.",
-            expected_capture=False, note="Casual greeting with mild status",
-        ),
-
-        # 09:30 — Sprint planning begins
-        ChannelMessage(
-            timestamp="1707899400.000400",
-            user="alice", channel="#architecture",
-            text='I want to kick off the database migration discussion. Our current MySQL setup is hitting scaling limits — 50ms p99 on read queries is too high for the payment service. We need to evaluate PostgreSQL vs CockroachDB for the primary datastore.',
-            expected_capture=True, expected_domain="architecture",
-            note="Problem statement with concrete metrics",
-        ),
-        ChannelMessage(
-            timestamp="1707899460.000500",
-            user="bob", channel="#architecture",
-            text="What are the actual numbers? Is 50ms really the bottleneck or is it the application layer?",
-            thread_ts="1707899400.000400",
-            expected_capture=False, note="Question without decision",
-        ),
-        ChannelMessage(
-            timestamp="1707899520.000600",
-            user="alice", channel="#architecture",
-            text='I ran benchmarks last week. MySQL p99 was 48ms, PostgreSQL on the same dataset was 12ms, CockroachDB was 18ms. "The difference is PostgreSQL\'s query planner handles our join-heavy workload much better" according to the benchmark report.',
-            thread_ts="1707899400.000400",
-            expected_capture=True, expected_domain="architecture",
-            note="Benchmark data with quoted evidence",
-        ),
-        ChannelMessage(
-            timestamp="1707899580.000700",
-            user="charlie", channel="#architecture",
-            text='Before we decide, I need to flag that any database migration must maintain encryption at rest. "Our SOC2 auditor specifically requires AES-256 for all PII data stores" — this is non-negotiable.',
-            thread_ts="1707899400.000400",
-            expected_capture=True, expected_domain="security",
-            note="Security requirement with quoted compliance mandate",
-        ),
-        ChannelMessage(
-            timestamp="1707899640.000800",
-            user="alice", channel="#architecture",
-            text='Both PostgreSQL and CockroachDB support TDE (Transparent Data Encryption). PostgreSQL uses pgcrypto extension for AES-256. Given the benchmark results and compliance requirements, I propose we go with PostgreSQL. The trade-offs are: we lose CockroachDB\'s automatic sharding, but we gain better query performance and a more mature ecosystem.',
-            thread_ts="1707899400.000400",
-            expected_capture=True, expected_domain="architecture",
-            note="Final decision with trade-off analysis",
-        ),
-        ChannelMessage(
-            timestamp="1707899700.000900",
-            user="bob", channel="#architecture",
-            text="Sounds like a plan. I'll update the sprint board.",
-            thread_ts="1707899400.000400",
-            expected_capture=False, note="Casual agreement, no additional decision",
-        ),
-
-        # 10:30 — Bob's sprint priorities
-        ChannelMessage(
-            timestamp="1707903000.001000",
-            user="bob", channel="#sprint-planning",
-            text='Sprint priorities for Q1-W3: We are prioritizing the auth refactor over the mobile redesign. Three features are blocked on the OAuth PKCE migration. "The current implementation has a known token replay vulnerability" per the security audit. Allocating 2 engineers full-time for 2 weeks.',
-            expected_capture=True, expected_domain="product",
-            note="Sprint prioritization with security rationale and resource allocation",
-        ),
-        ChannelMessage(
-            timestamp="1707903060.001100",
-            user="bob", channel="#sprint-planning",
-            text="Still working on the velocity calculations from last sprint. Should have the report by EOD.",
-            expected_capture=False, note="Status update",
-        ),
-
-        # 11:00 — Charlie's security policies
-        ChannelMessage(
-            timestamp="1707904800.001200",
-            user="charlie", channel="#security",
-            text='New policy effective immediately: All inter-service communication must use mTLS with certificates issued by our internal CA. "The penetration test found 3 services communicating over plaintext HTTP internally" which is a critical finding. Deadline: all services must be migrated by March 15.',
-            expected_capture=True, expected_domain="security",
-            note="Security policy with evidence from pentest and deadline",
-        ),
-        ChannelMessage(
-            timestamp="1707904860.001300",
-            user="charlie", channel="#security",
-            text="Running the weekly vulnerability scan now. Will share results when done.",
-            expected_capture=False, note="Routine operational task",
-        ),
-        ChannelMessage(
-            timestamp="1707904920.001400",
-            user="alice", channel="#security",
-            text="Charlie, will the mTLS requirement affect our gRPC setup? We're already using TLS for gRPC.",
-            thread_ts="1707904800.001200",
-            expected_capture=False, note="Question without decision",
-        ),
-        ChannelMessage(
-            timestamp="1707904980.001500",
-            user="charlie", channel="#security",
-            text='gRPC already uses TLS, but we need mutual TLS — both client and server must present certificates. The policy is: "Every service must present a valid x509 certificate, and every service must verify the peer certificate against our CA." This applies to gRPC, HTTP, and any custom TCP services.',
-            thread_ts="1707904800.001200",
-            expected_capture=True, expected_domain="security",
-            note="Clarification that adds concrete technical detail to the mTLS policy",
-        ),
-
-        # 12:00 — Lunch break casual
-        ChannelMessage(
-            timestamp="1707908400.001600",
-            user="bob", channel="#random",
-            text="Anyone want to grab lunch at the new ramen place?",
-            expected_capture=False, note="Casual",
-        ),
-        ChannelMessage(
-            timestamp="1707908460.001700",
-            user="alice", channel="#random",
-            text="Sure thing! I'll be ready in 5.",
-            expected_capture=False, note="Casual",
-        ),
-
-        # 14:00 — Afternoon decisions
-        ChannelMessage(
-            timestamp="1707915600.001800",
-            user="alice", channel="#engineering",
-            text='We standardized on TypeScript strict mode for all frontend code, effective today. The policy: no new JavaScript files in the monorepo. Existing JS files will be migrated over the next 3 sprints. We chose TypeScript over Flow because "TypeScript has 10x the community ecosystem and better IDE support."',
-            expected_capture=True, expected_domain="architecture",
-            note="Technology standardization with timeline and quoted rationale",
-        ),
-        ChannelMessage(
-            timestamp="1707915660.001900",
-            user="bob", channel="#engineering",
-            text="Merged the PR for the login page fix. Tests are green.",
-            expected_capture=False, note="Routine PR notification",
-        ),
-
-        # 15:00 — Vague/borderline messages
-        ChannelMessage(
-            timestamp="1707919200.002000",
-            user="bob", channel="#engineering",
-            text="I read an article about how Spotify uses feature flags at scale. Interesting approach, maybe we should look into that someday.",
-            expected_capture=False, note="Information sharing + vague suggestion",
-        ),
-        ChannelMessage(
-            timestamp="1707919260.002100",
-            user="alice", channel="#random",
-            text="We decided to order sushi for the team dinner tonight. The Thai place was closed.",
-            expected_capture=False, note="Borderline: contains 'We decided' but is food, not org decision",
-        ),
-
-        # 16:00 — Deployment policy
-        ChannelMessage(
-            timestamp="1707922800.002200",
-            user="bob", channel="#devops",
-            text='New deployment policy: All production deployments must use blue-green strategy. "Deploy to staging first, run automated smoke tests, then switch traffic" is the new standard. We chose blue-green over canary because our test coverage is high enough to catch issues pre-switch. Rollback time drops from ~30 minutes to under 10 seconds.',
-            expected_capture=True, expected_domain="ops",
-            note="Deployment policy with build-vs-buy rationale and quantified improvement",
-        ),
-    ],
-    expected_captures=9,
-    expected_rejects=13,
-)
-
-
-# ============================================================================
-# Day 2: Implementation Decisions
-# ============================================================================
-
-DAY2 = DayScenario(
-    day=2,
-    title="Implementation Decisions",
-    messages=[
-        # 09:00 — Morning standup
-        ChannelMessage(
-            timestamp="1707984000.003000",
-            user="bob", channel="#engineering",
-            text="Standup in 5 minutes! Please have your updates ready.",
-            expected_capture=False, note="Meeting reminder",
-        ),
-        ChannelMessage(
-            timestamp="1707984060.003100",
-            user="alice", channel="#engineering",
-            text="Deploying the database migration script to staging now. Will monitor for the next hour.",
-            expected_capture=False, note="Status update",
-        ),
-
-        # 09:30 — API design finalization
-        ChannelMessage(
-            timestamp="1707985800.003200",
-            user="alice", channel="#architecture",
-            text='Finalizing the API design: we will use gRPC with Protocol Buffers for all internal service-to-service communication. REST will be reserved only for the public API gateway. The key reasons: our benchmarks show gRPC is 3x faster than REST for our payload sizes, and protobuf gives us backward-compatible schema evolution. The trade-off is that browser clients need a gRPC-Web proxy, which adds one hop.',
-            expected_capture=True, expected_domain="architecture",
-            note="API design decision with benchmark rationale and trade-off",
-        ),
-        ChannelMessage(
-            timestamp="1707985860.003300",
-            user="charlie", channel="#architecture",
-            text='I want to add a requirement to the gRPC decision: all gRPC services must implement health checking per the gRPC Health Checking Protocol. This is needed for our Kubernetes readiness probes and for the mTLS certificate rotation without downtime.',
-            thread_ts="1707985800.003200",
-            expected_capture=True, expected_domain="architecture",
-            note="Additional requirement tied to Day 1 mTLS decision",
-        ),
-
-        # 10:00 — Feature flags
-        ChannelMessage(
-            timestamp="1707987600.003400",
-            user="bob", channel="#engineering",
-            text='We decided to adopt LaunchDarkly for feature flags instead of building in-house. Build-vs-buy analysis showed: building our own would take ~6 months of engineering time (2 engineers) plus ongoing maintenance. LaunchDarkly gives us targeting rules, gradual rollouts, and an audit trail out of the box. "The ROI is clear — we save $300K in engineering cost over 2 years" per our analysis.',
-            expected_capture=True, expected_domain="product",
-            note="Build-vs-buy with quantified ROI",
-        ),
-        ChannelMessage(
-            timestamp="1707987660.003500",
-            user="alice", channel="#engineering",
-            text="Not sure yet about the pricing tier. Let me check with finance and get back.",
-            thread_ts="1707987600.003400",
-            expected_capture=False, note="Vague / pending decision",
-        ),
-
-        # 11:00 — Security review process
-        ChannelMessage(
-            timestamp="1707991200.003600",
-            user="charlie", channel="#security",
-            text='After the vulnerability disclosure on the payment API last week, we are implementing a mandatory security review process. Policy: every PR that touches authentication, payment processing, or PII handling code must be reviewed by a security-certified engineer before merge. No exceptions. We chose to enforce this via GitHub CODEOWNERS rather than honor system because "automated enforcement is the only reliable enforcement" — lesson from the incident.',
-            expected_capture=True, expected_domain="security",
-            note="Security review policy triggered by incident, with enforcement mechanism and lesson learned",
-        ),
-        ChannelMessage(
-            timestamp="1707991260.003700",
-            user="charlie", channel="#security",
-            text="CVE-2024-1234 was patched in Node.js 20.11.1. We're already on 20.12.0 so no action needed.",
-            expected_capture=False, note="Informational CVE update, no decision",
-        ),
-
-        # 12:00 — Casual
-        ChannelMessage(
-            timestamp="1707994800.003800",
-            user="alice", channel="#random",
-            text="The sushi from last night was amazing! We should go there again.",
-            expected_capture=False, note="Casual",
-        ),
-
-        # 13:00 — Integration patterns
-        ChannelMessage(
-            timestamp="1707998400.003900",
-            user="alice", channel="#architecture",
-            text='For the event-driven architecture: Kafka is our standard message bus for all async communication between services. We chose Kafka over RabbitMQ because "Kafka\'s log-based architecture lets us replay events for debugging and rebuilding state after failures." The team also agreed that all events must follow a common envelope schema with correlation IDs for distributed tracing.',
-            expected_capture=True, expected_domain="architecture",
-            note="Integration pattern decision with technical rationale and team agreement",
-        ),
-
-        # 14:00 — Performance standards
-        ChannelMessage(
-            timestamp="1708002000.004000",
-            user="bob", channel="#performance",
-            text='New performance standards: All API endpoints must respond within 200ms at p95. We identified the user dashboard as the worst offender (3.2s) due to N+1 query patterns. The fix: mandatory use of DataLoader for all list/collection endpoints. "We measured a 16x improvement from 3.2s to 195ms after batching" in the dashboard endpoint. This pattern is now required for all new endpoints.',
-            expected_capture=True, expected_domain="architecture",
-            note="Performance standard with measurement data and mandatory pattern",
-        ),
-
-        # 15:00 — Encryption details
-        ChannelMessage(
-            timestamp="1708005600.004100",
-            user="charlie", channel="#security",
-            text='Encryption implementation details finalized: AES-256-GCM for data at rest (PostgreSQL TDE via pgcrypto), TLS 1.3 for data in transit. Key management: encryption keys stored in HashiCorp Vault with automatic rotation every 90 days. We chose AES-256-GCM over AES-256-CBC because GCM provides authenticated encryption — "it prevents both tampering and confidentiality breaches in a single primitive."',
-            expected_capture=True, expected_domain="security",
-            note="Encryption standard with technical rationale for algorithm choice",
-        ),
-
-        # 16:00 — Status and borderline
-        ChannelMessage(
-            timestamp="1708009200.004200",
-            user="bob", channel="#engineering",
-            text="Reminder: please update your Jira tickets before end of sprint tomorrow.",
-            expected_capture=False, note="Administrative reminder",
-        ),
-        ChannelMessage(
-            timestamp="1708009260.004300",
-            user="alice", channel="#architecture",
-            text="I wonder if we should consider GraphQL for the public API someday. Could be interesting.",
-            expected_capture=False, note="Vague musing without commitment",
-        ),
-
-        # 17:00 — Rate limiting decision
-        ChannelMessage(
-            timestamp="1708012800.004400",
-            user="charlie", channel="#security",
-            text='Rate limiting strategy decided: we will implement rate limiting at the API gateway using Kong. We chose Kong over building custom middleware because Kong provides built-in rate limiting with Redis backend, IP allowlisting, and integrates with our existing Prometheus monitoring. Configuration: 100 requests/minute for unauthenticated, 1000/minute for authenticated, 10000/minute for internal service-to-service.',
-            expected_capture=True, expected_domain="security",
-            note="Rate limiting decision with tool choice rationale and specific configuration",
-        ),
-    ],
-    expected_captures=8,
-    expected_rejects=7,
-)
-
-
-# ============================================================================
-# Day 3: Incident Response + Recall
-# ============================================================================
-
-DAY3 = DayScenario(
-    day=3,
-    title="Incident Response + Recall",
-    messages=[
-        # 02:00 — Incident alert
-        ChannelMessage(
-            timestamp="1708070400.005000",
-            user="bob", channel="#incidents",
-            text="ALERT: Payment service is returning 500 errors. Error rate at 15%. Checking the logs now.",
-            expected_capture=False, note="Alert notification + status",
-        ),
-        ChannelMessage(
-            timestamp="1708070460.005100",
-            user="bob", channel="#incidents",
-            text="Root cause identified: the retry logic in the payment service is causing cascading failures. Each failed request spawns 3 retries, which overwhelm the database connection pool.",
-            expected_capture=False, note="Investigation finding — status, not decision yet",
-        ),
-
-        # 02:30 — Incident decision
-        ChannelMessage(
-            timestamp="1708072200.005200",
-            user="alice", channel="#incidents",
-            text='Immediate fix: we are disabling retries on the payment service and implementing a circuit breaker pattern. "When error rate exceeds 50%, stop sending requests and return a cached response." We are using Resilience4j for the circuit breaker implementation. This is a permanent architectural decision, not just an incident patch.',
-            expected_capture=True, expected_domain="architecture",
-            note="Incident response decision with permanent architectural impact",
-        ),
-        ChannelMessage(
-            timestamp="1708072260.005300",
-            user="charlie", channel="#incidents",
-            text='Adding a security note to the incident: the retry storm exposed that our rate limiter was not applied to internal service-to-service calls. Updating the Kong configuration to enforce the 10000/min limit on internal traffic too. "This is consistent with our defense-in-depth policy."',
-            thread_ts="1708072200.005200",
-            expected_capture=True, expected_domain="security",
-            note="Security policy update triggered by incident",
-        ),
-
-        # 03:00 — Incident resolved
-        ChannelMessage(
-            timestamp="1708074000.005400",
-            user="bob", channel="#incidents",
-            text="Incident resolved. Payment service error rate back to 0%. Deploying the circuit breaker to all services now.",
-            expected_capture=False, note="Status update — incident resolution",
-        ),
-
-        # 09:00 — Morning after
-        ChannelMessage(
-            timestamp="1708095600.005500",
-            user="alice", channel="#general",
-            text="Good morning. Rough night with the incident. Let's make sure we do a proper postmortem.",
-            expected_capture=False, note="Casual reference to incident",
-        ),
-
-        # 10:00 — Post-mortem
-        ChannelMessage(
-            timestamp="1708099200.005600",
-            user="alice", channel="#incidents",
-            text='Post-mortem conclusion: The retry storm incident taught us three key lessons. First, never implement retries without circuit breakers — this is now a mandatory architectural pattern. Second, all retry configurations must use exponential backoff with jitter, not fixed intervals. Third, our load testing must include failure scenarios, not just happy path. "We will add chaos engineering tests to the CI pipeline" was the team decision.',
-            expected_capture=True, expected_domain="ops",
-            note="Post-mortem with 3 lessons learned and concrete action items",
-        ),
-        ChannelMessage(
-            timestamp="1708099260.005700",
-            user="bob", channel="#incidents",
-            text='From the product side: we are adding a circuit breaker health dashboard to the monitoring system. The existing Grafana dashboards will be extended with Resilience4j metrics. "Every team must be able to see their service\'s circuit breaker state in real-time." This ties into our Q1 observability goals.',
-            thread_ts="1708099200.005600",
-            expected_capture=True, expected_domain="ops",
-            note="Observability decision tied to incident response",
-        ),
-        ChannelMessage(
-            timestamp="1708099320.005800",
-            user="charlie", channel="#incidents",
-            text='Security post-mortem addition: the incident revealed that we had no alerting on anomalous internal traffic patterns. New policy: implement anomaly detection on internal service call volumes. When any service exceeds 3x its baseline call rate, trigger an automatic alert. "Anomalous traffic patterns are often the first sign of both failures and attacks."',
-            thread_ts="1708099200.005600",
-            expected_capture=True, expected_domain="security",
-            note="New monitoring policy from security perspective",
-        ),
-
-        # 11:00 — Process improvement
-        ChannelMessage(
-            timestamp="1708102800.005900",
-            user="bob", channel="#engineering",
-            text='Based on the incident, we are updating our deployment checklist. New requirement: every service deployment must include a rollback test. "We found that 40% of our services had untested rollback procedures" during the incident audit. The blue-green deployment policy from Day 1 helped us recover the payment service in under 10 seconds, validating that decision.',
-            expected_capture=True, expected_domain="ops",
-            note="Process improvement with quantified finding and reference to earlier decision",
-        ),
-
-        # 12:00 — Casual
-        ChannelMessage(
-            timestamp="1708106400.006000",
-            user="charlie", channel="#random",
-            text="After last night, I think we all deserve a long lunch. Anyone want Thai food?",
-            expected_capture=False, note="Casual",
-        ),
-        ChannelMessage(
-            timestamp="1708106460.006100",
-            user="bob", channel="#random",
-            text="Will do! Let me grab my jacket.",
-            expected_capture=False, note="Casual",
-        ),
-
-        # 14:00 — Dependency policy (emerged from incident)
-        ChannelMessage(
-            timestamp="1708113600.006200",
-            user="alice", channel="#architecture",
-            text='New architectural policy from the incident learnings: all service-to-service dependencies must declare timeout, retry, and circuit breaker configurations in a standardized config file (circuit-breaker.yaml). "No service may call another service without explicit failure handling configuration." We chose YAML over environment variables because it is version-controlled and reviewable in PRs.',
-            expected_capture=True, expected_domain="architecture",
-            note="New architectural standard emerged from incident",
-        ),
-
-        # 15:00 — Status updates
-        ChannelMessage(
-            timestamp="1708117200.006300",
-            user="bob", channel="#engineering",
-            text="Build passed on the circuit breaker integration. Deploying to staging for smoke tests.",
-            expected_capture=False, note="Status update",
-        ),
-        ChannelMessage(
-            timestamp="1708117260.006400",
-            user="alice", channel="#engineering",
-            text="Monitoring the staging deployment. Seems fine now.",
-            expected_capture=False, note="Status update",
-        ),
-
-        # 16:00 — Borderline
-        ChannelMessage(
-            timestamp="1708120800.006500",
-            user="bob", channel="#engineering",
-            text="Maybe we should think about adopting Rust for the performance-critical services. Not sure yet though.",
-            expected_capture=False, note="Vague suggestion without commitment",
-        ),
-    ],
-    expected_captures=7,
-    expected_rejects=9,
-)
-
-
-ALL_DAYS = [DAY1, DAY2, DAY3]
-
-
-# ============================================================================
-# Tests
-# ============================================================================
-
-class TestDayScenarioCompleteness:
-    """Verify test data is comprehensive and balanced"""
-
-    def test_day1_message_count(self):
-        assert len(DAY1.messages) == 22
-        captures = [m for m in DAY1.messages if m.expected_capture]
-        rejects = [m for m in DAY1.messages if not m.expected_capture]
-        assert len(captures) == DAY1.expected_captures
-        assert len(rejects) == DAY1.expected_rejects
-
-    def test_day2_message_count(self):
-        assert len(DAY2.messages) == 15
-        captures = [m for m in DAY2.messages if m.expected_capture]
-        rejects = [m for m in DAY2.messages if not m.expected_capture]
-        assert len(captures) == DAY2.expected_captures
-        assert len(rejects) == DAY2.expected_rejects
-
-    def test_day3_message_count(self):
-        assert len(DAY3.messages) == 16
-        captures = [m for m in DAY3.messages if m.expected_capture]
-        rejects = [m for m in DAY3.messages if not m.expected_capture]
-        assert len(captures) == DAY3.expected_captures
-        assert len(rejects) == DAY3.expected_rejects
-
-    def test_total_messages(self):
-        total = sum(len(d.messages) for d in ALL_DAYS)
-        assert total >= 50, f"Should have >=50 total messages, got {total}"
-
-    def test_all_members_active_each_day(self):
-        for day in ALL_DAYS:
-            users = set(m.user for m in day.messages)
-            assert "alice" in users, f"Alice missing from Day {day.day}"
-            assert "bob" in users, f"Bob missing from Day {day.day}"
-            assert "charlie" in users, f"Charlie missing from Day {day.day}"
-
-    def test_thread_discussions_present(self):
-        """At least some messages should be thread replies"""
-        thread_replies = [m for d in ALL_DAYS for m in d.messages if m.thread_ts]
-        assert len(thread_replies) >= 5, f"Should have >=5 thread replies, got {len(thread_replies)}"
-
-    def test_channels_diverse(self):
-        channels = set(m.channel for d in ALL_DAYS for m in d.messages)
-        assert len(channels) >= 5, f"Should use >=5 channels, got {channels}"
-
-
-class TestDayScenarioPipeline:
-    """Run each day's messages through the full 3-tier pipeline"""
-
-    @pytest.fixture
-    def pipeline(self):
-        from agents.scribe.tier2_filter import Tier2Filter
-        from agents.scribe.record_builder import RecordBuilder, RawEvent
-        from agents.scribe.detector import DecisionDetector
-        from agents.common.pattern_cache import PatternCache, PatternEntry
-
-        # Tier 1 mock
-        cache = Mock(spec=PatternCache)
-        cache.pattern_count = 10
-
-        def mock_match(text, threshold=0.0):
-            if len(text.strip()) < 20:
-                return (None, 0.1)
-            entry = PatternEntry(
-                text="decision pattern", category="architecture",
-                priority="high", embedding=[0.1] * 384, domain="architecture",
-            )
-            return (entry, 0.75)
-
-        cache.find_best_match.side_effect = mock_match
-        detector = DecisionDetector(cache, threshold=0.5, high_confidence_threshold=0.8)
-
-        # Tier 2 mock
-        tier2 = Tier2Filter.__new__(Tier2Filter)
-        tier2._provider = "anthropic"
-        tier2._model = "test"
-
-        mock_llm = Mock()
-        mock_llm.is_available = True
-
-        def tier2_call(prompt, **kwargs):
-            # Extract the actual text from "<message>\n...\n</message>" format
-            text = prompt.replace("<message>\n", "").split("\n</message>")[0]
-            text = text.split("\n(Tier 1")[0]
-            j = simulate_tier2(text)
-            return json.dumps(j)
-
-        mock_llm.generate.side_effect = tier2_call
-        tier2._llm = mock_llm
-
-        # Tier 3
-        builder = RecordBuilder()
-
-        return {"detector": detector, "tier2": tier2, "builder": builder}
-
-    def _process(self, pipeline, msg: ChannelMessage):
-        from agents.scribe.record_builder import RawEvent
-        from agents.common.language import detect_language
-
-        det = pipeline["detector"]
-        t2 = pipeline["tier2"]
-        bld = pipeline["builder"]
-
-        result = det.detect(msg.text)
-        if not result.is_significant:
-            return None
-
-        fr = t2.evaluate(msg.text, tier1_score=result.confidence, tier1_pattern=result.matched_pattern or "")
-        if not fr.should_capture:
-            return None
-
-        if fr.domain != "general" and result.domain in (None, "general"):
-            result.domain = fr.domain
-
-        raw = RawEvent(
-            text=msg.text, user=msg.user, channel=msg.channel,
-            timestamp=msg.timestamp, source="slack", thread_ts=msg.thread_ts,
-        )
-        lang = detect_language(msg.text)
-        return bld.build(raw, result, language=lang)
-
-    def test_day1_pipeline(self, pipeline):
-        """Day 1 messages processed correctly"""
-        for msg in DAY1.messages:
-            record = self._process(pipeline, msg)
-            if msg.expected_capture:
-                assert record is not None, f"Day 1 SHOULD CAPTURE: {msg.text[:60]}... ({msg.note})"
-                assert len(record.payload.text) > 50
-            else:
-                assert record is None, f"Day 1 SHOULD REJECT: {msg.text[:60]}... ({msg.note})"
-
-    def test_day2_pipeline(self, pipeline):
-        """Day 2 messages processed correctly"""
-        for msg in DAY2.messages:
-            record = self._process(pipeline, msg)
-            if msg.expected_capture:
-                assert record is not None, f"Day 2 SHOULD CAPTURE: {msg.text[:60]}... ({msg.note})"
-                assert len(record.payload.text) > 50
-            else:
-                assert record is None, f"Day 2 SHOULD REJECT: {msg.text[:60]}... ({msg.note})"
-
-    def test_day3_pipeline(self, pipeline):
-        """Day 3 messages processed correctly"""
-        for msg in DAY3.messages:
-            record = self._process(pipeline, msg)
-            if msg.expected_capture:
-                assert record is not None, f"Day 3 SHOULD CAPTURE: {msg.text[:60]}... ({msg.note})"
-                assert len(record.payload.text) > 50
-            else:
-                assert record is None, f"Day 3 SHOULD REJECT: {msg.text[:60]}... ({msg.note})"
-
-    def test_overall_capture_statistics(self, pipeline):
-        """Verify overall stats across all 3 days"""
-        total = 0
-        captured = 0
-        rejected = 0
-
-        for day in ALL_DAYS:
-            for msg in day.messages:
-                total += 1
-                record = self._process(pipeline, msg)
-                if record:
-                    captured += 1
-                else:
-                    rejected += 1
-
-        expected_total_captures = sum(d.expected_captures for d in ALL_DAYS)
-        assert captured == expected_total_captures, (
-            f"Expected {expected_total_captures} total captures, got {captured}"
-        )
-
-        # Capture rate should be 30-55%
-        rate = captured / total
-        assert 0.30 <= rate <= 0.55, f"Capture rate {rate:.1%} out of expected range"
-
-    def test_captured_records_have_valid_structure(self, pipeline):
-        """All captured records should have proper Decision Record structure"""
-        for day in ALL_DAYS:
-            for msg in day.messages:
-                if not msg.expected_capture:
-                    continue
-                record = self._process(pipeline, msg)
-                assert record is not None
-
-                # Structural validation
-                assert record.id, f"Missing ID: {msg.text[:40]}"
-                assert record.title, f"Missing title: {msg.text[:40]}"
-                assert record.payload.text, f"Missing payload.text: {msg.text[:40]}"
-                assert record.payload.format == "markdown"
-                assert record.evidence, f"Missing evidence: {msg.text[:40]}"
-                assert record.why.certainty, f"Missing certainty: {msg.text[:40]}"
-                assert record.domain, f"Missing domain: {msg.text[:40]}"
-
-                # payload.text should be markdown, not JSON
-                assert not record.payload.text.strip().startswith("{")
-                assert "# " in record.payload.text or "## " in record.payload.text
-
-
-class TestCrossTeamRecallDay:
-    """Test that Day 1-2 decisions can be recalled on Day 3"""
-
-    def test_postmortem_references_earlier_decisions(self):
-        """Post-mortem messages should reference earlier architecture/deployment decisions"""
-        # Day 3 post-mortem mentions circuit breakers (new) and
-        # references blue-green deployment policy (Day 1)
-        postmortem = DAY3.messages[9]  # Bob's deployment checklist update
-        assert "blue-green" in postmortem.text.lower() or "rollback" in postmortem.text.lower()
-        assert postmortem.expected_capture is True
-
-    def test_incident_triggers_policy_updates(self):
-        """Day 3 incident should generate new policies (not just status)"""
-        day3_captures = [m for m in DAY3.messages if m.expected_capture]
-        # Should have multiple capture-worthy messages from the incident
-        assert len(day3_captures) >= 5, (
-            f"Incident response should generate >=5 captured decisions/learnings, got {len(day3_captures)}"
-        )
-
-        # Different members should contribute to incident response
-        incident_users = set(m.user for m in day3_captures)
-        assert len(incident_users) >= 2, "Multiple team members should contribute to incident response"
-
-    def test_day3_recalls_day1_encryption(self):
-        """Day 3 security decisions should be consistent with Day 1 encryption requirements"""
-        day1_security = [m for m in DAY1.messages if m.expected_capture and m.expected_domain == "security"]
-        day3_security = [m for m in DAY3.messages if m.expected_capture and m.expected_domain == "security"]
-
-        assert len(day1_security) >= 2, "Day 1 should establish security baseline"
-        assert len(day3_security) >= 1, "Day 3 should have security updates"
-
-    def test_query_processor_handles_incident_queries(self):
-        """Common incident-related recall queries should parse correctly"""
-        from agents.retriever.query_processor import QueryProcessor, QueryIntent
-
-        qp = QueryProcessor()
-
-        incident_queries = [
-            ("Why did we choose the circuit breaker pattern?", QueryIntent.DECISION_RATIONALE),
-            ("What's our process for deploying to production?", QueryIntent.PATTERN_LOOKUP),
-            ("What are the security requirements for internal traffic?", QueryIntent.SECURITY_COMPLIANCE),
-        ]
-
-        for query_text, expected_intent in incident_queries:
-            parsed = qp.parse(query_text)
-            assert parsed.intent == expected_intent, (
-                f"Query '{query_text}' should have intent {expected_intent.value}, got {parsed.intent.value}"
-            )
-
diff --git a/agents/tests/test_tier2_filter.py b/agents/tests/test_tier2_filter.py
deleted file mode 100644
index a05f2b3..0000000
--- a/agents/tests/test_tier2_filter.py
+++ /dev/null
@@ -1,268 +0,0 @@
-"""
-Tests for Tier 2 LLM Filter
-
-Tests the Haiku-based policy evaluator that filters Tier 1 candidates.
-"""
-
-import json
-import pytest
-from unittest.mock import Mock, MagicMock, patch
-
-
-class TestFilterResult:
-    """Tests for FilterResult dataclass"""
-
-    def test_capture_result(self):
-        from agents.scribe.tier2_filter import FilterResult
-
-        result = FilterResult(
-            should_capture=True,
-            reason="Contains a concrete technology decision with rationale",
-            domain="architecture",
-        )
-
-        assert result.should_capture is True
-        assert "decision" in result.reason
-        assert result.domain == "architecture"
-
-    def test_reject_result(self):
-        from agents.scribe.tier2_filter import FilterResult
-
-        result = FilterResult(
-            should_capture=False,
-            reason="Casual greeting, no decision content",
-            domain="general",
-        )
-
-        assert result.should_capture is False
-
-
-class TestTier2Filter:
-    """Tests for Tier2Filter"""
-
-    @pytest.fixture
-    def mock_anthropic_response(self):
-        """Create a mock Anthropic API response"""
-        def _make_response(capture: bool, reason: str, domain: str = "general"):
-            response = Mock()
-            content_block = Mock()
-            content_block.text = json.dumps({
-                "capture": capture,
-                "reason": reason,
-                "domain": domain,
-            })
-            response.content = [content_block]
-            return response
-        return _make_response
-
-    @pytest.fixture
-    def filter_with_mock(self, mock_anthropic_response):
-        """Create a Tier2Filter with mocked LLM client"""
-        from agents.scribe.tier2_filter import Tier2Filter
-
-        f = Tier2Filter.__new__(Tier2Filter)
-        f._provider = "anthropic"
-        f._model = "claude-haiku-4-5-20251001"
-
-        mock_llm = Mock()
-        mock_llm.is_available = True
-        mock_llm.generate.return_value = json.dumps({
-            "capture": True,
-            "reason": "Contains a concrete architecture decision",
-            "domain": "architecture",
-        })
-        f._llm = mock_llm
-        return f
-
-    def test_evaluate_captures_decision(self, filter_with_mock, mock_anthropic_response):
-        """Test that a real decision is captured"""
-        filter_with_mock._llm.generate.return_value = json.dumps({
-            "capture": True,
-            "reason": "Concrete technology choice with rationale",
-            "domain": "architecture",
-        })
-
-        result = filter_with_mock.evaluate(
-            "We decided to use PostgreSQL instead of MongoDB because we need ACID compliance",
-            tier1_score=0.75,
-            tier1_pattern="We decided to use X instead of Y because...",
-        )
-
-        assert result.should_capture is True
-        assert result.domain == "architecture"
-
-    def test_evaluate_rejects_casual(self, filter_with_mock, mock_anthropic_response):
-        """Test that casual chat is rejected"""
-        filter_with_mock._llm.generate.return_value = json.dumps({
-            "capture": False,
-            "reason": "Casual greeting with no decision content",
-            "domain": "general",
-        })
-
-        result = filter_with_mock.evaluate(
-            "Good morning everyone! How was your weekend?",
-            tier1_score=0.55,
-        )
-
-        assert result.should_capture is False
-
-    def test_evaluate_rejects_vague(self, filter_with_mock, mock_anthropic_response):
-        """Test that vague opinions are rejected"""
-        filter_with_mock._llm.generate.return_value = json.dumps({
-            "capture": False,
-            "reason": "Vague opinion without commitment or concrete decision",
-            "domain": "general",
-        })
-
-        result = filter_with_mock.evaluate(
-            "Maybe we should consider using a different database sometime",
-            tier1_score=0.52,
-        )
-
-        assert result.should_capture is False
-
-    def test_evaluate_captures_policy(self, filter_with_mock, mock_anthropic_response):
-        """Test that policy statements are captured"""
-        filter_with_mock._llm.generate.return_value = json.dumps({
-            "capture": True,
-            "reason": "Establishes a team security policy",
-            "domain": "security",
-        })
-
-        result = filter_with_mock.evaluate(
-            "All API keys must be rotated every 90 days, no exceptions",
-            tier1_score=0.68,
-        )
-
-        assert result.should_capture is True
-        assert result.domain == "security"
-
-    def test_evaluate_fallback_on_unavailable(self):
-        """Test fallback when LLM is unavailable"""
-        from agents.scribe.tier2_filter import Tier2Filter
-
-        f = Tier2Filter.__new__(Tier2Filter)
-        f._provider = "anthropic"
-        f._model = "claude-haiku-4-5-20251001"
-        mock_llm = Mock()
-        mock_llm.is_available = False
-        f._llm = mock_llm
-
-        assert f.is_available is False
-
-        result = f.evaluate("Some text")
-        # Fail-closed: reject on unavailable to avoid noise
-        assert result.should_capture is False
-        assert "unavailable" in result.reason.lower()
-
-    def test_evaluate_fallback_on_error(self, filter_with_mock):
-        """Test fallback when LLM call fails"""
-        filter_with_mock._llm.generate.side_effect = Exception("API error")
-
-        result = filter_with_mock.evaluate("Some text")
-        # Fail-closed: reject on error to avoid noise
-        assert result.should_capture is False
-        assert "error" in result.reason.lower()
-
-    def test_parse_response_valid_json(self):
-        """Test parsing valid JSON response"""
-        from agents.scribe.tier2_filter import Tier2Filter
-
-        f = Tier2Filter.__new__(Tier2Filter)
-        f._provider = "anthropic"
-        f._model = "test"
-        mock_llm = Mock()
-        mock_llm.is_available = False
-        f._llm = mock_llm
-
-        result = f._parse_response('{"capture": true, "reason": "test", "domain": "ops"}')
-        assert result.should_capture is True
-        assert result.domain == "ops"
-
-    def test_parse_response_with_markdown_fences(self):
-        """Test parsing response wrapped in markdown code fences"""
-        from agents.scribe.tier2_filter import Tier2Filter
-
-        f = Tier2Filter.__new__(Tier2Filter)
-        f._provider = "anthropic"
-        f._model = "test"
-        mock_llm = Mock()
-        mock_llm.is_available = False
-        f._llm = mock_llm
-
-        result = f._parse_response('```json\n{"capture": false, "reason": "not relevant", "domain": "general"}\n```')
-        assert result.should_capture is False
-
-    def test_parse_response_invalid_json(self):
-        """Test fallback on invalid JSON"""
-        from agents.scribe.tier2_filter import Tier2Filter
-
-        f = Tier2Filter.__new__(Tier2Filter)
-        f._provider = "anthropic"
-        f._model = "test"
-        mock_llm = Mock()
-        mock_llm.is_available = False
-        f._llm = mock_llm
-
-        result = f._parse_response("This is not JSON at all")
-        # Fail-closed: reject on parse failure to avoid noise
-        assert result.should_capture is False
-
-    def test_system_prompt_content(self):
-        """Test that the policy prompt covers key concepts"""
-        from agents.scribe.tier2_filter import FILTER_POLICY
-
-        # Should mention what to capture
-        assert "decision" in FILTER_POLICY.lower()
-        assert "policy" in FILTER_POLICY.lower()
-        assert "trade-off" in FILTER_POLICY.lower()
-
-        # Should mention what NOT to capture
-        assert "casual" in FILTER_POLICY.lower()
-        assert "status update" in FILTER_POLICY.lower()
-        assert "vague" in FILTER_POLICY.lower()
-
-    def test_text_truncation(self, filter_with_mock, mock_anthropic_response):
-        """Test that very long messages are truncated"""
-        filter_with_mock._llm.generate.return_value = json.dumps({
-            "capture": True, "reason": "test", "domain": "general",
-        })
-
-        long_text = "x" * 1000
-        filter_with_mock.evaluate(long_text)
-
-        # Verify the call was made with truncated text
-        call_args = filter_with_mock._llm.generate.call_args
-        user_msg = call_args.args[0]
-        # Message prefix "<message>\n" + 500 chars max + "\n</message>"
-        assert len(user_msg) <= 600
-
-
-class TestTier2Integration:
-    """Integration tests for the 3-tier pipeline flow"""
-
-    def test_tier1_pass_tier2_reject(self):
-        """Tier 1 passes but Tier 2 correctly rejects false positive"""
-        from agents.scribe.tier2_filter import FilterResult
-
-        # Simulate: "We decided to order pizza" triggers Tier 1 ("We decided")
-        # but Tier 2 should reject it as non-organizational
-        tier2_result = FilterResult(
-            should_capture=False,
-            reason="Food order, not organizational decision",
-        )
-
-        assert tier2_result.should_capture is False
-
-    def test_tier1_pass_tier2_pass(self):
-        """Both tiers agree on a real decision"""
-        from agents.scribe.tier2_filter import FilterResult
-
-        tier2_result = FilterResult(
-            should_capture=True,
-            reason="Concrete database technology choice with ACID rationale",
-            domain="architecture",
-        )
-
-        assert tier2_result.should_capture is True
-        assert tier2_result.domain == "architecture"
diff --git a/mcp/.env.example b/mcp/.env.example
deleted file mode 100644
index ab327e2..0000000
--- a/mcp/.env.example
+++ /dev/null
@@ -1,24 +0,0 @@
-# MCP server name
-MCP_SERVER_NAME="envector_mcp_server"
-
-# enVector connection
-ENVECTOR_ENDPOINT="runestone-xxx.clusters.envector.io"
-ENVECTOR_API_KEY=""
-
-# enVector options
-ENVECTOR_KEY_ID="vault-key"
-ENVECTOR_KEY_PATH="./keys"
-ENVECTOR_EVAL_MODE="rmp"
-ENVECTOR_ENCRYPTED_QUERY="false"
-ENVECTOR_AUTO_KEY_SETUP="true"
-
-# Rune-Vault integration (optional)
-# Set these to fetch public keys from Rune-Vault at startup
-RUNEVAULT_ENDPOINT=""
-RUNEVAULT_TOKEN=""
-# Optional: explicit gRPC target (auto-derived from RUNEVAULT_ENDPOINT if omitted)
-# RUNEVAULT_GRPC_TARGET="vault-host:50051"
-
-# Embedding mode
-EMBEDDING_MODE="femb"
-EMBEDDING_MODEL="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
diff --git a/mcp/adapter/__init__.py b/mcp/adapter/__init__.py
deleted file mode 100644
index 90d30d7..0000000
--- a/mcp/adapter/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from .envector_sdk import EnVectorSDKAdapter
-from .embeddings import EmbeddingAdapter
-from .document_preprocess import DocumentPreprocessingAdapter
-from .vault_client import (
-    VaultClient,
-    VaultError,
-    DecryptResult,
-    create_vault_client,
-)
-
-__all__ = [
-    "EmbeddingAdapter",
-    "EnVectorSDKAdapter",
-    "DocumentPreprocessingAdapter",
-    "VaultClient",
-    "VaultError",
-    "DecryptResult",
-    "create_vault_client",
-]
diff --git a/mcp/adapter/document_preprocess.py b/mcp/adapter/document_preprocess.py
deleted file mode 100644
index 57f0866..0000000
--- a/mcp/adapter/document_preprocess.py
+++ /dev/null
@@ -1,166 +0,0 @@
-from pathlib import Path
-from dataclasses import dataclass
-from typing import Dict, Any, List, Optional
-
-from langchain_text_splitters import RecursiveCharacterTextSplitter, Language
-from pypdf import PdfReader
-
-from logging import getLogger
-logger = getLogger(__name__)
-
-SUPPORTED_LANG = [e.value for e in Language]
-EXT_PATTERN = {
-    "PYTHON": ["*.py"],
-    "DOCUMENT": ["*.md", "*.mdx"],
-}
-CHUNK_OPTS = {
-    "PYTHON": {"chunk_size": 800, "chunk_overlap": 200},
-    "DOCUMENT": {"chunk_size": 1000, "chunk_overlap": 200},
-}
-
-@dataclass
-class DocumentFile:
-    path: str
-    content: str
-
-
-class DocumentPreprocessingAdapter:
-    """
-    Adapter for document preprocessing using LangChain.
-    """
-    def __init__(self) -> None:
-        pass
-
-    def preprocess_document_from_text(
-        self,
-        texts: List[str],
-    ) -> List[Dict[str, Any]]:
-        """
-        Preprocess documents from the given text inputs
-        """
-        # check language support
-        language = self._check_language_supported(language="DOCUMENT")
-        # Load documents from the given files path
-        documents = self._load_documents_from_text(texts)
-        # get text splitter
-        splitter = self._get_splitter(language)
-        # Chunk documents
-        chunks = self._chunk_documents(documents, splitter)
-        return chunks
-
-    def preprocess_documents_from_path(
-        self,
-        path: str,
-        language: Optional[str] = None,
-    ) -> List[Dict[str, Any]]:
-        """
-        Preprocess documents from the given path
-        """
-        # check language support
-        language = self._check_language_supported(language)
-        # Load documents from the given files path
-        documents = self._load_documents_from_path(path, language)
-        # get text splitter
-        splitter = self._get_splitter(language)
-        # Chunk documents
-        chunks = self._chunk_documents(documents, splitter)
-        return chunks
-
-    def _check_language_supported(self, language: Optional[str] = None) -> str:
-        if language is None:
-            language = "DOCUMENT"
-        language = language.upper()
-        if language not in EXT_PATTERN.keys():
-            raise ValueError(f"Unsupported language for document preprocessing: {language}")
-        return language
-
-    def _load_documents_from_text(self, texts: List[str]) -> List[DocumentFile]:
-        doc_files = [
-            DocumentFile(path=f"input_text_{idx}", content=text)
-            for idx, text in enumerate(texts)
-        ]
-        logger.info(f"{len(doc_files)} text document loaded")
-        return doc_files
-
-    def _load_documents_from_path(self, path: str, language: Optional[str] = None) -> List[DocumentFile]:
-        root = Path(path)
-        doc_files: List[DocumentFile] = []
-
-        patterns = EXT_PATTERN[language]
-
-        if root.suffix == ".pdf":
-            reader = PdfReader(str(root))
-            doc_files = []
-
-            for i, page in enumerate(reader.pages):
-                try:
-                    text = page.extract_text() or ""
-                except Exception as e:
-                    text = f"[Error reading page {i}: {e}]"
-                doc_files.append(DocumentFile(path=f"{root.name}::page-{i}", content=text))
-
-        else:
-            for pattern in patterns:
-                for file_path in root.glob(pattern):
-                    if any(part.startswith(".") for part in file_path.parts):
-                        continue
-
-                    try:
-                        text = file_path.read_text(encoding="utf-8")
-                    except UnicodeDecodeError:
-                        text = file_path.read_text(encoding="utf-8", errors="ignore")
-
-                    rel_path = str(file_path.relative_to(root))
-                    doc_files.append(DocumentFile(path=rel_path, content=text))
-
-        logger.info(f"{len(doc_files)} document files loaded")
-
-        return doc_files
-
-    def _get_splitter(
-        self,
-        language: str = None,
-    ) -> RecursiveCharacterTextSplitter:
-        """
-        Get text splitter based on language
-        """
-        chunk_kwargs = CHUNK_OPTS[language]
-        if language == "DOCUMENT":
-            return RecursiveCharacterTextSplitter(
-                **chunk_kwargs
-            )
-
-        splitter = RecursiveCharacterTextSplitter.from_language(
-            language=getattr(Language, language),
-            **chunk_kwargs
-        )
-
-        return splitter
-
-    def _chunk_documents(
-        self,
-        document_files: List[DocumentFile],
-        splitter: RecursiveCharacterTextSplitter,
-    ) -> List[Dict[str, Any]]:
-        """
-        Create chunks from Document of Python code files
-        """
-        chunks: List[Dict[str, Any]] = []
-
-        for code_file in document_files:
-            split_texts = splitter.split_text(code_file.content)
-
-            for idx, chunk_text in enumerate(split_texts):
-                chunk = {
-                    "id": f"{code_file.path}::chunk-{idx}",
-                    "text": chunk_text,
-                    "metadata": {
-                        "source": code_file.path,
-                        "chunk_index": idx,
-                    },
-                }
-                chunks.append(chunk)
-
-        logger.info(f"{len(chunks)} chunks created from documents")
-
-        return chunks
diff --git a/mcp/adapter/embeddings.py b/mcp/adapter/embeddings.py
deleted file mode 100644
index 0cdc201..0000000
--- a/mcp/adapter/embeddings.py
+++ /dev/null
@@ -1,154 +0,0 @@
-from typing import List, Union
-
-import numpy as np
-
-
-class EmbeddingAdapter:
-    """
-    General Adapter for various embedding SDK interactions.
-    """
-    def __init__(self, mode: str, model_name: str) -> None:
-        self.mode = mode
-        self.model_name = model_name
-
-        if mode in ["fastembed", "femb"]:
-            self.adapter = FastEmbedSDKAdapter(model_name)
-        elif mode in ["sbert", "sentence_transformer"]:
-            self.adapter = SBERTSDKAdapter(model_name)
-        elif mode in ["huggingface", "hf"]:
-            self.adapter = HuggingFaceSDKAdapter(model_name)
-        elif mode == "openai":
-            self.adapter = OpenAISDKAdapter(model_name)
-        else:
-            raise ValueError(f"Unsupported embedding mode: {mode}")
-
-    def get_embedding(self, texts: List[str]) -> Union[List[float], List[List[float]], np.ndarray]:
-        """
-        Retrieves embeddings for a list of texts using the specified SDK.
-
-        Args:
-            texts (List[str]): A list of texts to embed.
-
-        Returns:
-            np.ndarray: List of embeddings where each row corresponds to the embedding of a text
-        """
-        embeddings = self.adapter.get_embedding(texts)
-
-        # l2 normalize
-        embeddings = self._normalize_embeddings(np.array(embeddings))
-        assert embeddings.shape[0] == len(texts)
-        return embeddings.tolist()
-
-    def _normalize_embeddings(self, embeddings: np.ndarray) -> np.ndarray:
-        # l2 normalize and guard against zero vectors
-        norm = np.linalg.norm(embeddings, axis=1, keepdims=True)
-        epsilon = 1e-12
-        norm = np.maximum(norm, epsilon)
-        embeddings = embeddings / norm
-        return embeddings
-
-
-class FastEmbedSDKAdapter:
-    """
-    Adapter for FastEmbed SDK interactions.
-    """
-    def __init__(self, model_name: str = "fastembed/fastembed-base") -> None:
-        """
-        Initializes the FastEmbedSDKAdapter with the provided model name.
-
-        Args:
-            model_name (str): The name of the FastEmbed model to use.
-        """
-
-        from fastembed import TextEmbedding
-
-        self.model = TextEmbedding(model_name)
-
-
-    def get_embedding(self, texts: List[str]) -> Union[List[float], List[List[float]], np.ndarray]:
-        """
-        Retrieves the embedding for the given text using FastEmbed SDK.
-        """
-        embeddings = list(self.model.embed(texts))
-        return embeddings
-
-
-class SBERTSDKAdapter:
-    """
-    Adapter for SBERT (Sentence Transformer) SDK interactions.
-    """
-    def __init__(self, model_name: str = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") -> None:
-        """
-        Initializes the SBERTSDKAdapter with the provided model name.
-
-        Args:
-            model_name (str): The name of the Sentence Transformer model to use.
-        """
-
-        from sentence_transformers import SentenceTransformer
-
-        self.model = SentenceTransformer(model_name, trust_remote_code=True)
-
-
-    def get_embedding(self, texts: List[str]) -> Union[List[float], List[List[float]], np.ndarray]:
-        """
-        Retrieves the embedding for the given text using Sentence Transformer SDK.
-        """
-        return self.model.encode(texts)
-
-
-class HuggingFaceSDKAdapter:
-    """
-    Adapter for HuggingFace SDK interactions.
-    """
-    def __init__(self, model_name: str = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", cache_dir: str = None) -> None:
-        """
-        Initializes the HuggingFaceSDKAdapter with the provided model name and cache directory.
-
-        Args:
-            model_name (str): The name of the HuggingFace model to use.
-            cache_dir (str): The directory to cache the model.
-        """
-
-        from transformers import AutoTokenizer, AutoModel
-
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
-        self.model = AutoModel.from_pretrained(model_name, cache_dir=cache_dir)
-
-    def get_embedding(self, texts: List[str]) -> Union[List[float], List[List[float]], np.ndarray]:
-        """
-        Retrieves the embedding for the given text using HuggingFace SDK.
-        """
-        encoded_input = self.tokenizer(texts, padding=True, truncation=True, return_tensors='pt', max_length=512)
-        embeddings = self.model(**encoded_input).last_hidden_state[:, 0, :]
-        return embeddings.detach().numpy()
-
-
-class OpenAISDKAdapter:
-    """
-    Adapter for OpenAI API interactions.
-    """
-    def __init__(self, model_name: str) -> None:
-        """
-        Initializes the OpenAISDKAdapter with the provided model name.
-
-        Args:
-            model_name (str): The OpenAI model name.
-        """
-
-        import openai
-
-        self.model_name = model_name
-        self.client = openai.OpenAI()
-
-    def get_embedding(self, texts: List[str]) -> Union[List[float], List[List[float]], np.ndarray]:
-        """
-        Retrieves embeddings for a list of texts using OpenAI API.
-        """
-        response = self.client.embeddings.create(
-            input=texts,
-            model=self.model_name,
-            encoding_format="float",
-        )
-        outputs = np.array([e.embedding for e in response.data])
-        return outputs
diff --git a/mcp/adapter/envector_sdk.py b/mcp/adapter/envector_sdk.py
deleted file mode 100644
index d9fc746..0000000
--- a/mcp/adapter/envector_sdk.py
+++ /dev/null
@@ -1,386 +0,0 @@
-# Summary of file: enVector SDK Adapter(enVector APIs Caller)
-
-from typing import Union, List, Dict, Any
-import base64
-import json
-import logging
-import os
-import numpy as np
-import pyenvector as ev  # pip install pyenvector
-from pyenvector.crypto.block import CipherBlock
-from pyenvector.crypto.parameter import KeyParameter
-from google.protobuf.json_format import MessageToDict
-
-from pathlib import Path
-
-logger = logging.getLogger("rune.adapter")
-
-SCRIPT_DIR = Path(__file__).parent.resolve()
-KEY_PATH = SCRIPT_DIR.parent.parent / "keys" # Manage keys directory at project root
-
-# ---------------------------------------------------------------------------
-# Vault-model safety patches for pyenvector KeyParameter
-#
-# In the Vault security model SecKey and MetadataKey never leave Vault,
-# so the local .json files do not exist.  pyenvector's KeyParameter
-# properties call utils.get_key_stream(path) which falls through to
-# ast.literal_eval(path_string) and raises SyntaxError when the file
-# is missing.
-#
-# The patches return None for missing key files, allowing Cipher to
-# initialise in encrypt-only mode — exactly what insert operations need.
-# ---------------------------------------------------------------------------
-_original_sec_key_fget = KeyParameter.sec_key.fget
-_original_sec_key_path_fget = KeyParameter.sec_key_path.fget
-_original_metadata_key_fget = KeyParameter.metadata_key.fget
-_original_metadata_key_path_fget = KeyParameter.metadata_key_path.fget
-
-def _safe_sec_key_getter(self):
-    """Return None when SecKey.json is absent instead of crashing."""
-    if getattr(self, 'sec_key_stream', None):
-        return _original_sec_key_fget(self)
-    path = _original_sec_key_path_fget(self)
-    if path and not os.path.exists(path):
-        return None
-    return _original_sec_key_fget(self)
-
-def _safe_sec_key_path_getter(self):
-    """Return None when SecKey.json is absent so Cipher skips decryptor init."""
-    path = _original_sec_key_path_fget(self)
-    if path and not os.path.exists(path):
-        return None
-    return path
-
-def _safe_metadata_key_getter(self):
-    """Return None when MetadataKey.json is absent instead of crashing."""
-    if getattr(self, 'metadata_key_stream', None):
-        return _original_metadata_key_fget(self)
-    path = _original_metadata_key_path_fget(self)
-    if path and not os.path.exists(path):
-        return None
-    return _original_metadata_key_fget(self)
-
-def _safe_metadata_key_path_getter(self):
-    """Return None when MetadataKey.json is absent so Cipher skips metadata encryption."""
-    path = _original_metadata_key_path_fget(self)
-    if path and not os.path.exists(path):
-        return None
-    return path
-
-_original_metadata_encryption_fget = KeyParameter.metadata_encryption.fget
-
-def _safe_metadata_encryption_getter(self):
-    """Return False when MetadataKey.json is absent (app-layer handles encryption)."""
-    if not _original_metadata_encryption_fget(self):
-        return False
-    # If metadata_encryption is True but key file is missing, override to False
-    path = _original_metadata_key_path_fget(self)
-    if path and not os.path.exists(path):
-        return False
-    return True
-
-KeyParameter.sec_key = property(_safe_sec_key_getter, KeyParameter.sec_key.fset)
-KeyParameter.sec_key_path = property(_safe_sec_key_path_getter)
-KeyParameter.metadata_key = property(_safe_metadata_key_getter, KeyParameter.metadata_key.fset)
-KeyParameter.metadata_key_path = property(_safe_metadata_key_path_getter)
-KeyParameter.metadata_encryption = property(_safe_metadata_encryption_getter, KeyParameter.metadata_encryption.fset)
-
-# gRPC error messages related to dead/stale connection
-CONNECTION_ERROR_PATTERNS = (
-    "UNAVAILABLE",
-    "DEADLINE_EXCEEDED",
-    "Connection refused",
-    "Connection reset",
-    "Stream removed",
-    "RST_STREAM",
-    "Broken pipe",
-    "Transport closed",
-    "Socket closed",
-    "EOF",
-    "failed to connect",
-)
-
-
-class EnVectorSDKAdapter:
-    """
-    Adapter class to interact with the enVector SDK.
-    """
-    def __init__(
-            self,
-            address: str,
-            key_id: str,
-            key_path: str,
-            eval_mode: str,
-            query_encryption: bool,
-            access_token: str = None,
-            auto_key_setup: bool = True,
-            agent_id: str = None,
-            agent_dek: bytes = None,
-        ):
-        """
-        Initializes the EnVectorSDKAdapter with an optional endpoint.
-
-        Args:
-            address (str): The endpoint URL for the enVector SDK.
-            key_id (str): The key identifier for the enVector SDK.
-            key_path (str): The path to the key files.
-            eval_mode (str): The evaluation mode for the enVector SDK.
-            query_encryption (bool): Whether to encrypt the query vectors.
-            access_token (str, optional): The access token for the enVector SDK.
-            auto_key_setup (bool): If True, generates keys automatically when not found.
-                                   Set to False when keys are provided externally (e.g., from Vault).
-            agent_id (str): Per-agent identifier for app-layer metadata encryption.
-            agent_dek (bytes): Per-agent AES-256 DEK (32 bytes) for metadata encryption.
-        """
-        if not key_path:
-            key_path = str(KEY_PATH)
-        self.query_encryption = query_encryption
-        self._agent_id = agent_id
-        self._agent_dek = agent_dek
-
-        # Store init params for reinitialization on connection loss
-        self._init_params = {
-            "address": address,
-            "key_path": key_path,
-            "key_id": key_id,
-            "eval_mode": eval_mode,
-            "auto_key_setup": auto_key_setup,
-            "access_token": access_token,
-        }
-
-        ev.init(**self._init_params)
-
-    #--------------- Get Index List --------------#
-    def call_get_index_list(self) -> Dict[str, Any]:
-        """
-        Calls the enVector SDK to get the list of indexes.
-
-        Returns:
-            Dict[str, Any]: If succeed, converted format of the index list. Otherwise, error message.
-        """
-        try:
-            results = self.invoke_get_index_list()
-            return self._to_json_available({"ok": True, "results": results})
-        except Exception as e:
-            # Handle exceptions and return an appropriate error message
-            return {"ok": False, "error": repr(e)}
-
-    #--------------- Connection resilience helpers --------------#
-
-    @staticmethod
-    def _is_connection_error(exc: Exception) -> bool:
-        if isinstance(exc, (ConnectionError, OSError)):
-            return True
-        msg = str(exc)
-        return any(pattern in msg for pattern in CONNECTION_ERROR_PATTERNS)
-
-    def _reinit(self) -> None:
-        logger.warning(
-            "enVector connection lost - reconnecting to %s ...",
-            self._init_params["address"],
-        )
-        ev.init(**self._init_params)
-        logger.info("enVector reconnection complete.")
-
-    def _with_reconnect(self, fn):
-        try:
-            return fn()
-        except Exception as exc:
-            if not self._is_connection_error(exc):
-                raise
-            logger.warning(
-                "enVector operation failed (%s: %s). Attempting reconnect...",
-                type(exc).__name__, exc,
-            )
-            self._reinit()
-            return fn()
-
-    def invoke_get_index_list(self) -> List[str]:
-        """
-        Invokes the enVector SDK's get_index_list functionality.
-
-        Returns:
-            List[str]: List of index names from the enVector SDK.
-        """
-        return self._with_reconnect(ev.get_index_list)
-
-    #------------------- Insert ------------------#
-
-    def call_insert(self, index_name: str, vectors: List[List[float]], metadata: List[Any] = None):
-        """
-        Calls the enVector SDK to perform an insert operation.
-
-        Args:
-            vectors (List[List[float]]): The list of vectors to insert.
-            metadata (List[Any], optional): The list of metadata associated with the vectors. Defaults to None.
-
-        Returns:
-            Dict[str, Any]: If succeed, converted format of the insert results. Otherwise, error message.
-        """
-        try:
-            results = self.invoke_insert(index_name=index_name, vectors=vectors, metadata=metadata)
-            return self._to_json_available({"ok": True, "results": results})
-        except Exception as e:
-            # Handle exceptions and return an appropriate error message
-            return {"ok": False, "error": repr(e)}
-
-    def _app_encrypt_metadata(self, metadata_str: str) -> str:
-        """
-        App-layer metadata encryption using per-agent DEK.
-        Returns JSON: {"a": "<agent_id>", "c": "<base64_ciphertext>"}
-        """
-        from pyenvector.utils.aes import encrypt_metadata as aes_encrypt
-        ct = aes_encrypt(metadata_str, self._agent_dek)
-        return json.dumps({"a": self._agent_id, "c": ct})
-
-    def invoke_insert(self, index_name: str, vectors: List[List[float]], metadata: List[Any] = None):
-        """
-        Invokes the enVector SDK's insert functionality.
-
-        Args:
-            index_name (str): The name of the index to insert into.
-            vectors (Union[List[List[float]], List[CipherBlock]]): The list of vectors to insert.
-            metadata (List[Any], optional): The list of metadata associated with the vectors. Defaults to None.
-
-        Returns:
-            Any: Raw insert results from the enVector SDK.
-        """
-        # App-layer metadata encryption with per-agent DEK
-        if self._agent_dek and metadata:
-            if not self._agent_id:
-                logger.warning("agent_dek is set but agent_id is missing — skipping metadata encryption")
-            else:
-                metadata = [self._app_encrypt_metadata(m) for m in metadata]
-
-        def _do_insert():
-            index = ev.Index(index_name) # Create an index instance with the given index name
-            # Insert vectors with optional metadata
-            return index.insert(data=vectors, metadata=metadata) # Return list of inserted vectors' IDs
-
-        return self._with_reconnect(_do_insert)
-
-    #------------------- Scoring (Vault-Secured Pipeline) ------------------#
-
-    def call_score(
-        self, index_name: str, query: Union[List[float], List[List[float]]]
-    ) -> Dict[str, Any]:
-        """
-        Query against the encrypted index and returns the result ciphertext for Vault decryption.
-
-        Args:
-            index_name: Index to search.
-            query: Query vector(s).
-
-        Returns:
-            Dict with ok, encrypted_blobs (List[str] of base64-encoded CiphertextScore protobuf), or error.
-        """
-        def _do_score():
-            index = ev.Index(index_name)
-            scores = index.scoring(query)  # List[CipherBlock] with is_score=True
-            encoded_blobs = []
-            for cb in scores:
-                # Serialize the CiphertextScore protobuf and encode to base64
-                serialized = cb.data.SerializeToString()
-                encoded_blob = base64.b64encode(serialized).decode('utf-8')
-                encoded_blobs.append(encoded_blob)
-            return {"ok": True, "encrypted_blobs": encoded_blobs}
-
-        try:
-            return self._with_reconnect(_do_score)
-        except Exception as e:
-            return {"ok": False, "error": repr(e)}
-
-    def call_remind(
-        self,
-        index_name: str,
-        indices: List[Dict[str, Any]],
-        output_fields: List[str] = None,
-    ) -> Dict[str, Any]:
-        """
-        Retrieves metadata for indices returned by Vault after decryption.
-
-        Args:
-            index_name: Index to fetch metadata from.
-            indices: List of dicts with "shard_idx", "row_idx", "score".
-            output_fields: Fields to include (default: ["metadata"]).
-
-        Returns:
-            Dict with ok, results (List[dict]), or error.
-        """
-        if output_fields is None:
-            output_fields = ["metadata"]
-
-        # Pre-validate before network call
-        idx_list = []
-        for entry in indices:
-            row_idx = entry.get("row_idx")
-            if row_idx is None:
-                raise ValueError("Missing required 'row_idx' in index entry: " + repr(entry))
-            idx_list.append(
-                {
-                    "shard_idx": entry.get("shard_idx", 0),
-                    "row_idx": row_idx,
-                }
-            )
-
-        def _do_remind():
-            index = ev.Index(index_name)
-            results = index.indexer.get_metadata(
-                index_name=index_name,
-                idx=idx_list,
-                fields=output_fields,
-            )
-            # Convert protobuf Metadata objects to dicts and attach scores
-            results_with_scores = []
-            for i, entry in enumerate(indices):
-                if i < len(results):
-                    metadata_obj = results[i]
-                    # Protobuf objects: use MessageToDict for proper field extraction
-                    if hasattr(metadata_obj, 'ListFields'):
-                        result_dict = MessageToDict(metadata_obj, preserving_proto_field_name=True)
-                    elif hasattr(metadata_obj, '_asdict'):
-                        result_dict = metadata_obj._asdict()
-                    elif hasattr(metadata_obj, '__dict__'):
-                        result_dict = metadata_obj.__dict__.copy()
-                    else:
-                        result_dict = {"metadata": str(metadata_obj)}
-
-                    # Attach score from Vault
-                    result_dict["score"] = entry.get("score", 0.0)
-                    results_with_scores.append(result_dict)
-            return self._to_json_available({"ok": True, "results": results_with_scores})
-
-        try:
-            return self._with_reconnect(_do_remind)
-        except Exception as e:
-            return {"ok": False, "error": repr(e)}
-
-    @staticmethod
-    def _to_json_available(obj: Any) -> Any:
-        """
-        Converts an object to a JSON-serializable format if possible.
-
-        Args:
-            obj (Any): The object to convert.
-
-        Returns:
-            Any: The JSON-serializable representation of the object, or the original object if conversion is not possible.
-        """
-        if obj is None or isinstance(obj, (str, int, float, bool)):
-            return obj
-        if isinstance(obj, dict):
-            return {str(k): EnVectorSDKAdapter._to_json_available(v) for k, v in obj.items()}
-        if isinstance(obj, (list, tuple, set)):
-            return [EnVectorSDKAdapter._to_json_available(item) for item in obj]
-        for attr in ("model_dump", "dict", "to_dict"):
-            if hasattr(obj, attr):
-                try:
-                    return EnVectorSDKAdapter._to_json_available(getattr(obj, attr)())
-                except Exception:
-                    pass
-        if hasattr(obj, "__dict__"):
-            try:
-                return {k: EnVectorSDKAdapter._to_json_available(v) for k, v in obj.__dict__.items() if not k.startswith("_")}
-            except Exception:
-                pass
-        return repr(obj)
diff --git a/mcp/adapter/vault_client.py b/mcp/adapter/vault_client.py
deleted file mode 100644
index e7a1ce7..0000000
--- a/mcp/adapter/vault_client.py
+++ /dev/null
@@ -1,380 +0,0 @@
-"""
-Vault Client for Rune-Vault gRPC Integration
-
-This client handles communication between envector-mcp-server and Rune-Vault.
-All decryption operations are delegated to Vault, which holds the secret key.
-
-Uses grpc.aio for async communication with Vault's gRPC server.
-Maintains a persistent channel (connection pooled internally by grpcio).
-
-Security Model:
-- MCP server NEVER has access to secret key
-- All decryption requests go through Vault
-- Audit trail maintained by Vault
-"""
-
-import os
-import json
-import logging
-from typing import Dict, Any, Optional, List
-from dataclasses import dataclass
-from urllib.parse import urlparse
-
-import grpc
-import grpc.aio
-import httpx
-
-logger = logging.getLogger(__name__)
-
-# Import generated stubs
-from .vault_proto import vault_service_pb2 as pb2
-from .vault_proto import vault_service_pb2_grpc as pb2_grpc
-
-MAX_MESSAGE_LENGTH = 256 * 1024 * 1024  # 256 MB (EvalKey can be tens of MB)
-
-
-@dataclass
-class DecryptResult:
-    """Result from Vault decryption of the result ciphertext."""
-    ok: bool
-    results: List[Dict[str, Any]]  # [{shard_idx: int, row_idx: int, score: float}, ...]
-    error: Optional[str] = None
-
-    @classmethod
-    def from_vault_response(cls, raw: Any) -> "DecryptResult":
-        """
-        Parse response from non-gRPC paths (kept for backward compatibility).
-        The gRPC path constructs DecryptResult directly from typed ScoreEntry messages.
-        """
-        if isinstance(raw, list):
-            return cls(ok=True, results=raw)
-        if isinstance(raw, dict) and "error" in raw:
-            return cls(ok=False, results=[], error=raw["error"])
-        raise VaultError(f"Unexpected vault response format: {type(raw).__name__}: {raw}")
-
-
-class VaultError(Exception):
-    """Error communicating with Vault."""
-    pass
-
-
-class VaultClient:
-    """
-    Async gRPC client for Rune-Vault decryption service.
-
-    Maintains a persistent gRPC channel (connection pooled internally by grpcio).
-    The channel is created lazily on first use.
-
-    Usage:
-        client = VaultClient(
-            vault_endpoint="http://vault:50080/mcp",
-            vault_token="your-token"
-        )
-        result = await client.decrypt_search_results(
-            encrypted_blob_b64="base64...",
-            top_k=5
-        )
-        await client.close()
-    """
-
-    def __init__(
-        self,
-        vault_endpoint: str,
-        vault_token: str,
-        timeout: float = 30.0,
-        ca_cert: Optional[str] = None,
-        tls_disable: bool = False,
-    ):
-        """
-        Initialize Vault client.
-
-        Args:
-            vault_endpoint: Vault gRPC target. Accepts multiple formats:
-                "host:port" (direct), "tcp://host:port", or legacy
-                "http://host:50080/mcp". RUNEVAULT_GRPC_TARGET overrides.
-            vault_token: Authentication token for Vault
-            timeout: Request timeout in seconds
-            ca_cert: Path to CA certificate PEM file for self-signed certs.
-                None or empty string uses system CA bundle.
-            tls_disable: If True, use insecure plaintext channel (dev only).
-        """
-        self.vault_endpoint = vault_endpoint.rstrip("/")
-        self.vault_token = vault_token
-        self.timeout = timeout
-        self._ca_cert = ca_cert
-        self._tls_disable = tls_disable
-
-        # Derive gRPC target from endpoint URL or use explicit override
-        self._grpc_target = os.getenv("RUNEVAULT_GRPC_TARGET")
-        if not self._grpc_target:
-            self._grpc_target = self._derive_grpc_target(self.vault_endpoint)
-
-        # Lazy channel creation (created on first use in async context)
-        self._channel: Optional[grpc.aio.Channel] = None
-        self._stub: Optional[pb2_grpc.VaultServiceStub] = None
-
-    @staticmethod
-    def _derive_grpc_target(endpoint: str) -> str:
-        """
-        Derive gRPC host:port from endpoint string.
-
-        Accepts multiple formats:
-            "host:port"                    -> "host:port"       (direct gRPC target)
-            "tcp://host:port"              -> "host:port"       (tcp scheme stripped)
-            "http://vault:50080/mcp"       -> "vault:50051"     (legacy HTTP, port replaced)
-            "https://vault.example.com"    -> "vault.example.com:50051"
-        """
-        parsed = urlparse(endpoint)
-
-        # Direct gRPC target: no scheme or tcp:// scheme
-        if not parsed.scheme or parsed.scheme == "tcp":
-            host = parsed.hostname or endpoint.split(":")[0].split("/")[0]
-            port = parsed.port
-            if port:
-                return f"{host}:{port}"
-            # bare hostname without port — assume default gRPC port
-            return f"{host}:50051"
-
-        # Legacy HTTP/HTTPS endpoint — extract host, use gRPC default port
-        host = parsed.hostname or endpoint.split(":")[0].split("/")[0]
-        return f"{host}:50051"
-
-    def _build_tls_credentials(self) -> grpc.ChannelCredentials:
-        """Build TLS channel credentials.
-
-        If _ca_cert is set, reads the PEM file for custom CA verification.
-        Otherwise, uses the system default CA bundle (grpc default).
-        """
-        root_certs = None
-        if self._ca_cert:
-            cert_path = os.path.expanduser(self._ca_cert)
-            if not os.path.isfile(cert_path):
-                raise VaultError(
-                    f"CA certificate file not found: {cert_path}. "
-                    "Check VAULT_CA_CERT or vault.ca_cert in config.json."
-                )
-            with open(cert_path, "rb") as f:
-                root_certs = f.read()
-            logger.info(f"Using custom CA certificate: {cert_path}")
-        else:
-            logger.info("Using system CA bundle for TLS verification")
-        return grpc.ssl_channel_credentials(root_certificates=root_certs)
-
-    def _ensure_channel(self):
-        """Create the async gRPC channel if not yet created."""
-        if self._channel is None:
-            options = [
-                ("grpc.max_send_message_length", MAX_MESSAGE_LENGTH),
-                ("grpc.max_receive_message_length", MAX_MESSAGE_LENGTH),
-            ]
-            if self._tls_disable:
-                logger.warning(
-                    "TLS disabled — gRPC traffic is unencrypted. "
-                    "Only use this for local development."
-                )
-                self._channel = grpc.aio.insecure_channel(
-                    self._grpc_target, options=options,
-                )
-            else:
-                credentials = self._build_tls_credentials()
-                self._channel = grpc.aio.secure_channel(
-                    self._grpc_target, credentials, options=options,
-                )
-            self._stub = pb2_grpc.VaultServiceStub(self._channel)
-
-    async def close(self):
-        """Close the gRPC channel."""
-        if self._channel is not None:
-            await self._channel.close()
-            self._channel = None
-            self._stub = None
-
-    async def get_public_key(self) -> dict:
-        """
-        Fetch the public key bundle via gRPC.
-
-        Returns:
-            Parsed dict: {"EncKey.json": "...", "EvalKey.json": "...", "index_name": "..."}
-
-        Raises:
-            VaultError: If the call fails
-        """
-        self._ensure_channel()
-        try:
-            request = pb2.GetPublicKeyRequest(token=self.vault_token)
-            response = await self._stub.GetPublicKey(
-                request, timeout=self.timeout
-            )
-            if response.error:
-                raise VaultError(f"GetPublicKey failed: {response.error}")
-            try:
-                return json.loads(response.key_bundle_json)
-            except (json.JSONDecodeError, ValueError) as e:
-                raise VaultError("GetPublicKey returned invalid JSON") from e
-        except grpc.aio.AioRpcError as e:
-            raise VaultError(f"gRPC GetPublicKey failed: {e.code()} {e.details()}")
-
-    async def decrypt_search_results(
-        self,
-        encrypted_blob_b64: str,
-        top_k: int = 5,
-    ) -> DecryptResult:
-        """
-        Decrypt result ciphertext from encrypted similarity search.
-
-        Args:
-            encrypted_blob_b64: Base64-encoded result ciphertext from
-                encrypted similarity search on enVector Cloud
-            top_k: Number of top results to return (max 10)
-
-        Returns:
-            DecryptResult with top-k indices and similarity values
-
-        Raises:
-            VaultError: If the call fails
-        """
-        self._ensure_channel()
-        try:
-            request = pb2.DecryptScoresRequest(
-                token=self.vault_token,
-                encrypted_blob_b64=encrypted_blob_b64,
-                top_k=top_k,
-            )
-            response = await self._stub.DecryptScores(
-                request, timeout=self.timeout
-            )
-            if response.error:
-                return DecryptResult(ok=False, results=[], error=response.error)
-
-            results = [
-                {
-                    "shard_idx": entry.shard_idx,
-                    "row_idx": entry.row_idx,
-                    "score": entry.score,
-                }
-                for entry in response.results
-            ]
-            return DecryptResult(ok=True, results=results)
-        except grpc.aio.AioRpcError as e:
-            raise VaultError(
-                f"gRPC DecryptScores failed: {e.code()} {e.details()}"
-            )
-
-    async def decrypt_metadata(
-        self,
-        encrypted_metadata_list: List[str],
-    ) -> List:
-        """
-        Decrypt AES-encrypted metadata via Vault.
-
-        Args:
-            encrypted_metadata_list: List of Base64-encoded encrypted metadata strings.
-
-        Returns:
-            List of decrypted metadata objects (dicts, strings, etc.)
-
-        Raises:
-            VaultError: If the call fails
-        """
-        self._ensure_channel()
-        try:
-            request = pb2.DecryptMetadataRequest(
-                token=self.vault_token,
-                encrypted_metadata_list=encrypted_metadata_list,
-            )
-            response = await self._stub.DecryptMetadata(
-                request, timeout=self.timeout
-            )
-            if response.error:
-                raise VaultError(f"DecryptMetadata failed: {response.error}")
-
-            # Parse each JSON string back to Python object
-            try:
-                return [json.loads(s) for s in response.decrypted_metadata]
-            except (json.JSONDecodeError, ValueError) as e:
-                raise VaultError("DecryptMetadata returned invalid JSON in metadata entry") from e
-        except grpc.aio.AioRpcError as e:
-            raise VaultError(
-                f"gRPC DecryptMetadata failed: {e.code()} {e.details()}"
-            )
-
-    async def health_check(self) -> bool:
-        """
-        Check if Vault is reachable.
-        Tries gRPC health check first, falls back to HTTP /health.
-        """
-        # Try gRPC health check
-        try:
-            self._ensure_channel()
-            from grpc_health.v1 import health_pb2 as health_proto
-            from grpc_health.v1 import health_pb2_grpc as health_grpc
-            health_stub = health_grpc.HealthStub(self._channel)
-            resp = await health_stub.Check(
-                health_proto.HealthCheckRequest(service=""),
-                timeout=5.0,
-            )
-            return resp.status == health_proto.HealthCheckResponse.SERVING
-        except Exception:
-            pass
-
-        # Fallback to HTTP /health (only if endpoint looks like an HTTP URL)
-        parsed = urlparse(self.vault_endpoint)
-        if parsed.scheme in ("http", "https"):
-            try:
-                verify = False if self._tls_disable else (self._ca_cert or True)
-                async with httpx.AsyncClient(timeout=httpx.Timeout(5.0), verify=verify) as client:
-                    base_url = self.vault_endpoint
-                    for suffix in ("/mcp", "/sse"):
-                        if base_url.endswith(suffix):
-                            base_url = base_url[:-len(suffix)]
-                            break
-                    response = await client.get(f"{base_url}/health")
-                    return response.status_code == 200
-            except Exception as e:
-                logger.warning(f"Vault health check failed: {e}")
-
-        logger.warning("Vault health check failed: gRPC unreachable")
-        return False
-
-
-def create_vault_client(
-    vault_endpoint: Optional[str] = None,
-    vault_token: Optional[str] = None,
-    ca_cert: Optional[str] = None,
-    tls_disable: bool = False,
-) -> Optional[VaultClient]:
-    """
-    Factory function to create Vault client from environment variables.
-
-    Environment variables:
-    - RUNEVAULT_ENDPOINT: Vault gRPC target (e.g., "vault:50051" or "tcp://host:port")
-    - RUNEVAULT_TOKEN: Authentication token for Vault
-    - RUNEVAULT_GRPC_TARGET: Optional explicit gRPC target override
-    - VAULT_CA_CERT: Path to CA certificate PEM (for self-signed certs)
-    - VAULT_TLS_DISABLE: Set to "true" to use insecure plaintext channel
-
-    Args:
-        vault_endpoint: Override for RUNEVAULT_ENDPOINT
-        vault_token: Override for RUNEVAULT_TOKEN
-        ca_cert: Override for VAULT_CA_CERT
-        tls_disable: Override for VAULT_TLS_DISABLE
-
-    Returns:
-        VaultClient if configured, None otherwise
-    """
-    endpoint = vault_endpoint or os.getenv("RUNEVAULT_ENDPOINT")
-    token = vault_token or os.getenv("RUNEVAULT_TOKEN")
-
-    if not endpoint or not token:
-        logger.info("Rune-Vault not configured (RUNEVAULT_ENDPOINT or RUNEVAULT_TOKEN missing)")
-        return None
-
-    resolved_ca_cert = ca_cert or os.getenv("VAULT_CA_CERT") or None
-    resolved_tls_disable = tls_disable or os.getenv("VAULT_TLS_DISABLE", "").lower() == "true"
-
-    return VaultClient(
-        vault_endpoint=endpoint,
-        vault_token=token,
-        ca_cert=resolved_ca_cert,
-        tls_disable=resolved_tls_disable,
-    )
diff --git a/mcp/adapter/vault_proto/__init__.py b/mcp/adapter/vault_proto/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/mcp/adapter/vault_proto/vault_service_pb2.py b/mcp/adapter/vault_proto/vault_service_pb2.py
deleted file mode 100644
index 83043e7..0000000
--- a/mcp/adapter/vault_proto/vault_service_pb2.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler.  DO NOT EDIT!
-# NO CHECKED-IN PROTOBUF GENCODE
-# source: vault_service.proto
-# Protobuf Python Version: 5.29.0
-"""Generated protocol buffer code."""
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import descriptor_pool as _descriptor_pool
-from google.protobuf import runtime_version as _runtime_version
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf.internal import builder as _builder
-_runtime_version.ValidateProtobufRuntimeVersion(
-    _runtime_version.Domain.PUBLIC,
-    5,
-    29,
-    0,
-    '',
-    'vault_service.proto'
-)
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-
-
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13vault_service.proto\x12\rrune.vault.v1\"$\n\x13GetPublicKeyRequest\x12\r\n\x05token\x18\x01 \x01(\t\">\n\x14GetPublicKeyResponse\x12\x17\n\x0fkey_bundle_json\x18\x01 \x01(\t\x12\r\n\x05\x65rror\x18\x02 \x01(\t\"P\n\x14\x44\x65\x63ryptScoresRequest\x12\r\n\x05token\x18\x01 \x01(\t\x12\x1a\n\x12\x65ncrypted_blob_b64\x18\x02 \x01(\t\x12\r\n\x05top_k\x18\x03 \x01(\x05\"?\n\nScoreEntry\x12\x11\n\tshard_idx\x18\x01 \x01(\x05\x12\x0f\n\x07row_idx\x18\x02 \x01(\x05\x12\r\n\x05score\x18\x03 \x01(\x01\"R\n\x15\x44\x65\x63ryptScoresResponse\x12*\n\x07results\x18\x01 \x03(\x0b\x32\x19.rune.vault.v1.ScoreEntry\x12\r\n\x05\x65rror\x18\x02 \x01(\t\"H\n\x16\x44\x65\x63ryptMetadataRequest\x12\r\n\x05token\x18\x01 \x01(\t\x12\x1f\n\x17\x65ncrypted_metadata_list\x18\x02 \x03(\t\"D\n\x17\x44\x65\x63ryptMetadataResponse\x12\x1a\n\x12\x64\x65\x63rypted_metadata\x18\x01 \x03(\t\x12\r\n\x05\x65rror\x18\x02 \x01(\t2\xa5\x02\n\x0cVaultService\x12W\n\x0cGetPublicKey\x12\".rune.vault.v1.GetPublicKeyRequest\x1a#.rune.vault.v1.GetPublicKeyResponse\x12Z\n\rDecryptScores\x12#.rune.vault.v1.DecryptScoresRequest\x1a$.rune.vault.v1.DecryptScoresResponse\x12`\n\x0f\x44\x65\x63ryptMetadata\x12%.rune.vault.v1.DecryptMetadataRequest\x1a&.rune.vault.v1.DecryptMetadataResponseb\x06proto3')
-
-_globals = globals()
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'vault_service_pb2', _globals)
-if not _descriptor._USE_C_DESCRIPTORS:
-  DESCRIPTOR._loaded_options = None
-  _globals['_GETPUBLICKEYREQUEST']._serialized_start=38
-  _globals['_GETPUBLICKEYREQUEST']._serialized_end=74
-  _globals['_GETPUBLICKEYRESPONSE']._serialized_start=76
-  _globals['_GETPUBLICKEYRESPONSE']._serialized_end=138
-  _globals['_DECRYPTSCORESREQUEST']._serialized_start=140
-  _globals['_DECRYPTSCORESREQUEST']._serialized_end=220
-  _globals['_SCOREENTRY']._serialized_start=222
-  _globals['_SCOREENTRY']._serialized_end=285
-  _globals['_DECRYPTSCORESRESPONSE']._serialized_start=287
-  _globals['_DECRYPTSCORESRESPONSE']._serialized_end=369
-  _globals['_DECRYPTMETADATAREQUEST']._serialized_start=371
-  _globals['_DECRYPTMETADATAREQUEST']._serialized_end=443
-  _globals['_DECRYPTMETADATARESPONSE']._serialized_start=445
-  _globals['_DECRYPTMETADATARESPONSE']._serialized_end=513
-  _globals['_VAULTSERVICE']._serialized_start=516
-  _globals['_VAULTSERVICE']._serialized_end=809
-# @@protoc_insertion_point(module_scope)
diff --git a/mcp/adapter/vault_proto/vault_service_pb2_grpc.py b/mcp/adapter/vault_proto/vault_service_pb2_grpc.py
deleted file mode 100644
index ee4dafd..0000000
--- a/mcp/adapter/vault_proto/vault_service_pb2_grpc.py
+++ /dev/null
@@ -1,195 +0,0 @@
-# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
-"""Client and server classes corresponding to protobuf-defined services."""
-import grpc
-import warnings
-
-from . import vault_service_pb2 as vault__service__pb2
-
-GRPC_GENERATED_VERSION = '1.71.2'
-GRPC_VERSION = grpc.__version__
-_version_not_supported = False
-
-try:
-    from grpc._utilities import first_version_is_lower
-    _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
-except ImportError:
-    _version_not_supported = True
-
-if _version_not_supported:
-    raise RuntimeError(
-        f'The grpc package installed is at version {GRPC_VERSION},'
-        + f' but the generated code in vault_service_pb2_grpc.py depends on'
-        + f' grpcio>={GRPC_GENERATED_VERSION}.'
-        + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
-        + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
-    )
-
-
-class VaultServiceStub(object):
-    """Rune-Vault gRPC service.
-    Holds the FHE secret key and performs all decryption operations.
-    Phase 1: dual-stack alongside FastMCP HTTP/SSE (port 50080).
-    """
-
-    def __init__(self, channel):
-        """Constructor.
-
-        Args:
-            channel: A grpc.Channel.
-        """
-        self.GetPublicKey = channel.unary_unary(
-                '/rune.vault.v1.VaultService/GetPublicKey',
-                request_serializer=vault__service__pb2.GetPublicKeyRequest.SerializeToString,
-                response_deserializer=vault__service__pb2.GetPublicKeyResponse.FromString,
-                _registered_method=True)
-        self.DecryptScores = channel.unary_unary(
-                '/rune.vault.v1.VaultService/DecryptScores',
-                request_serializer=vault__service__pb2.DecryptScoresRequest.SerializeToString,
-                response_deserializer=vault__service__pb2.DecryptScoresResponse.FromString,
-                _registered_method=True)
-        self.DecryptMetadata = channel.unary_unary(
-                '/rune.vault.v1.VaultService/DecryptMetadata',
-                request_serializer=vault__service__pb2.DecryptMetadataRequest.SerializeToString,
-                response_deserializer=vault__service__pb2.DecryptMetadataResponse.FromString,
-                _registered_method=True)
-
-
-class VaultServiceServicer(object):
-    """Rune-Vault gRPC service.
-    Holds the FHE secret key and performs all decryption operations.
-    Phase 1: dual-stack alongside FastMCP HTTP/SSE (port 50080).
-    """
-
-    def GetPublicKey(self, request, context):
-        """Returns the public key bundle (EncKey, EvalKey, optional team index name).
-        """
-        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-        context.set_details('Method not implemented!')
-        raise NotImplementedError('Method not implemented!')
-
-    def DecryptScores(self, request, context):
-        """Decrypts FHE-encrypted similarity scores and applies Top-K filtering.
-        """
-        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-        context.set_details('Method not implemented!')
-        raise NotImplementedError('Method not implemented!')
-
-    def DecryptMetadata(self, request, context):
-        """Decrypts a list of AES-encrypted metadata strings.
-        """
-        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
-        context.set_details('Method not implemented!')
-        raise NotImplementedError('Method not implemented!')
-
-
-def add_VaultServiceServicer_to_server(servicer, server):
-    rpc_method_handlers = {
-            'GetPublicKey': grpc.unary_unary_rpc_method_handler(
-                    servicer.GetPublicKey,
-                    request_deserializer=vault__service__pb2.GetPublicKeyRequest.FromString,
-                    response_serializer=vault__service__pb2.GetPublicKeyResponse.SerializeToString,
-            ),
-            'DecryptScores': grpc.unary_unary_rpc_method_handler(
-                    servicer.DecryptScores,
-                    request_deserializer=vault__service__pb2.DecryptScoresRequest.FromString,
-                    response_serializer=vault__service__pb2.DecryptScoresResponse.SerializeToString,
-            ),
-            'DecryptMetadata': grpc.unary_unary_rpc_method_handler(
-                    servicer.DecryptMetadata,
-                    request_deserializer=vault__service__pb2.DecryptMetadataRequest.FromString,
-                    response_serializer=vault__service__pb2.DecryptMetadataResponse.SerializeToString,
-            ),
-    }
-    generic_handler = grpc.method_handlers_generic_handler(
-            'rune.vault.v1.VaultService', rpc_method_handlers)
-    server.add_generic_rpc_handlers((generic_handler,))
-    server.add_registered_method_handlers('rune.vault.v1.VaultService', rpc_method_handlers)
-
-
- # This class is part of an EXPERIMENTAL API.
-class VaultService(object):
-    """Rune-Vault gRPC service.
-    Holds the FHE secret key and performs all decryption operations.
-    Phase 1: dual-stack alongside FastMCP HTTP/SSE (port 50080).
-    """
-
-    @staticmethod
-    def GetPublicKey(request,
-            target,
-            options=(),
-            channel_credentials=None,
-            call_credentials=None,
-            insecure=False,
-            compression=None,
-            wait_for_ready=None,
-            timeout=None,
-            metadata=None):
-        return grpc.experimental.unary_unary(
-            request,
-            target,
-            '/rune.vault.v1.VaultService/GetPublicKey',
-            vault__service__pb2.GetPublicKeyRequest.SerializeToString,
-            vault__service__pb2.GetPublicKeyResponse.FromString,
-            options,
-            channel_credentials,
-            insecure,
-            call_credentials,
-            compression,
-            wait_for_ready,
-            timeout,
-            metadata,
-            _registered_method=True)
-
-    @staticmethod
-    def DecryptScores(request,
-            target,
-            options=(),
-            channel_credentials=None,
-            call_credentials=None,
-            insecure=False,
-            compression=None,
-            wait_for_ready=None,
-            timeout=None,
-            metadata=None):
-        return grpc.experimental.unary_unary(
-            request,
-            target,
-            '/rune.vault.v1.VaultService/DecryptScores',
-            vault__service__pb2.DecryptScoresRequest.SerializeToString,
-            vault__service__pb2.DecryptScoresResponse.FromString,
-            options,
-            channel_credentials,
-            insecure,
-            call_credentials,
-            compression,
-            wait_for_ready,
-            timeout,
-            metadata,
-            _registered_method=True)
-
-    @staticmethod
-    def DecryptMetadata(request,
-            target,
-            options=(),
-            channel_credentials=None,
-            call_credentials=None,
-            insecure=False,
-            compression=None,
-            wait_for_ready=None,
-            timeout=None,
-            metadata=None):
-        return grpc.experimental.unary_unary(
-            request,
-            target,
-            '/rune.vault.v1.VaultService/DecryptMetadata',
-            vault__service__pb2.DecryptMetadataRequest.SerializeToString,
-            vault__service__pb2.DecryptMetadataResponse.FromString,
-            options,
-            channel_credentials,
-            insecure,
-            call_credentials,
-            compression,
-            wait_for_ready,
-            timeout,
-            metadata,
-            _registered_method=True)
diff --git a/mcp/requirements.txt b/mcp/requirements.txt
deleted file mode 100644
index 825cbc0..0000000
--- a/mcp/requirements.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-pydantic[email]>=2.11.7
-python-dotenv>=1.2.1
-fastmcp>=2.2.0
-pyenvector>=1.2.0
-pytest>=7.0.0
-pytest-asyncio>=0.18.0
-fastembed>=0.7.4
-langchain-text-splitters>=1.0.0
-pypdf>=6.4.1
diff --git a/mcp/server/__init__.py b/mcp/server/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/mcp/server/errors.py b/mcp/server/errors.py
deleted file mode 100644
index 5101d8a..0000000
--- a/mcp/server/errors.py
+++ /dev/null
@@ -1,118 +0,0 @@
-"""
-Custom exception for the Rune MCP server.
-
-Agents programmatically decide whether to retry, reconfigure, or report the failure
-
-Error response format:
-    {
-        "ok": false,
-        "error": {
-            "code": "ERROR_CODE",
-            "message": "...",
-            "retryable": true,
-            "recovery_hint": "..."
-        }
-    }
-"""
-
-
-class RuneError(Exception):
-    code: str = "INTERNAL_ERROR"
-    retryable: bool = False
-    recovery_hint: str = ""
-
-    def __init__(self, message: str = "", *, code: str = None, retryable: bool = None, recovery_hint: str = None):
-        super().__init__(message)
-        if code is not None:
-            self.code = code
-        if retryable is not None:
-            self.retryable = retryable
-        if recovery_hint is not None:
-            self.recovery_hint = recovery_hint
-
-
-# Vault errors
-class VaultConnectionError(RuneError):
-    code = "VAULT_CONNECTION_ERROR"
-    retryable = True
-    recovery_hint = (
-        "Vault is unreachable. Check: (1) Is the Vault server running? "
-        "(2) Is the endpoint correct in ~/.rune/config.json? "
-        "Run /rune:status for diagnostics."
-    )
-
-
-class VaultDecryptionError(RuneError):
-    code = "VAULT_DECRYPTION_ERROR"
-    retryable = False
-    recovery_hint = (
-        "Vault rejected the decryption request. Check: (1) Is your Vault token valid and not expired? "
-        "(2) Does the token have permission for this team index? "
-        "Run /rune:configure to update credentials."
-    )
-
-
-# envector errors
-class EnvectorConnectionError(RuneError):
-    code = "ENVECTOR_CONNECTION_ERROR"
-    retryable = True
-    recovery_hint = (
-        "Cannot reach enVector Cloud. Check: (1) Network connectivity, "
-        "(2) enVector endpoint in ~/.rune/config.json. "
-        "Run /rune:status for diagnostics."
-    )
-
-
-class EnvectorInsertError(RuneError):
-    code = "ENVECTOR_INSERT_ERROR"
-    retryable = True
-    recovery_hint = (
-        "Failed to store data in enVector. This may be transient — retry in a moment. "
-        "If persistent, check your API key and index permissions via /rune:status."
-    )
-
-
-# Pipeline errors
-class PipelineNotReadyError(RuneError):
-    code = "PIPELINE_NOT_READY"
-    retryable = False
-    recovery_hint = (
-        "Pipelines are not initialized. Run /rune:activate to reinitialize, "
-        "or restart Claude Code if the problem persists."
-    )
-
-
-# Input errors
-class InvalidInputError(RuneError):
-    code = "INVALID_INPUT"
-    retryable = False
-    recovery_hint = "Check input parameters and try again"
-
-
-# Helper functions
-def make_error(exc: Exception) -> dict:
-    """
-    Convert an exception into a structured MCP error
-    """
-    if isinstance(exc, RuneError):
-        result = {
-            "ok": False,
-            "error": {
-                "code": exc.code,
-                "message": str(exc),
-                "retryable": exc.retryable,
-            },
-        }
-        hint = exc.recovery_hint
-        if hint:
-            result["error"]["recovery_hint"] = hint
-        return result
-    # Fallback for unexpected exceptions (out of Rune)
-    return {
-        "ok": False,
-        "error": {
-            "code": "INTERNAL_ERROR",
-            "message": str(exc),
-            "retryable": False,
-        },
-    }
diff --git a/mcp/server/server.py b/mcp/server/server.py
deleted file mode 100644
index 8e78d48..0000000
--- a/mcp/server/server.py
+++ /dev/null
@@ -1,2002 +0,0 @@
-"""
-enVector MCP Server for Rune plugin.
-
-Transport: stdio only (launched by Claude Code plugin system).
-
-Expected MCP Tool Return Format:
-{
-    "ok": bool,
-    "results": Any,          # Present if ok is True
-    "error": str            # Present if ok is False
-}
-"""
-
-import argparse
-import logging
-from typing import Union, List, Dict, Any, Optional, Annotated
-from datetime import datetime, timezone
-import numpy as np
-import os, sys, signal, threading
-import json
-
-logger = logging.getLogger("rune.mcp")
-
-
-class _SensitiveFilter(logging.Filter):
-    """Sanitize potential secrets from log messages."""
-    import re
-    _PATTERNS = [
-        re.compile(r'(sk-|pk-|api_|envector_|evt_)[a-zA-Z0-9_-]{10,}'),
-        re.compile(r'(token|key|secret|password)["\s:=]+[a-zA-Z0-9_-]{20,}', re.IGNORECASE),
-    ]
-
-    def filter(self, record):
-        import re
-        msg = record.getMessage()
-        for pat in self._PATTERNS:
-            msg = pat.sub(lambda m: m.group()[:8] + '***', msg)
-        record.msg = msg
-        record.args = ()
-        return True
-
-
-logger.addFilter(_SensitiveFilter())
-from pydantic import Field
-
-# Add parent directory (rune/mcp/) to sys.path so `from adapter import ...` works
-CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
-MCP_ROOT = os.path.dirname(CURRENT_DIR)
-PLUGIN_ROOT = os.path.dirname(MCP_ROOT)  # rune/ root for `from agents import ...`
-# Re-insert paths to take precedence over the script dir
-for _p in (MCP_ROOT, PLUGIN_ROOT):
-    try:
-        sys.path.remove(_p)
-    except ValueError:
-        pass
-sys.path[0:0] = [PLUGIN_ROOT, MCP_ROOT]
-del _p
-
-from fastmcp import FastMCP, Context  # pip install fastmcp
-from mcp.types import ToolAnnotations
-from adapter import EnVectorSDKAdapter
-from adapter.vault_client import VaultClient, VaultError
-from server.errors import (
-    RuneError, VaultConnectionError, VaultDecryptionError,
-    EnvectorConnectionError, EnvectorInsertError,
-    PipelineNotReadyError, InvalidInputError, make_error,
-)
-
-
-def _detection_from_agent_data(
-    domain: str = "general",
-    confidence: float = 0.0,
-    category: str = "",
-) -> "DetectionResult":
-    """Build DetectionResult from agent-provided metadata.
-
-    In agent-delegated mode the calling agent has already evaluated
-    significance.  We construct a minimal DetectionResult so that
-    RecordBuilder can consume it without running the pattern detector.
-    """
-    from agents.scribe.detector import DetectionResult
-    return DetectionResult(
-        is_significant=True,  # Agent said capture=true
-        confidence=confidence,
-        domain=domain,
-        category=category or domain,
-    )
-
-
-def _embedding_text_for_record(record) -> str:
-    """Select the text to embed in enVector.
-
-    Schema 2.1+: use reusable_insight (dense NL gist).
-    Schema 2.0 fallback: use payload.text (verbose markdown).
-    """
-    from agents.common.schemas.embedding import embedding_text_for_record
-    return embedding_text_for_record(record)
-
-
-def _classify_novelty(
-    max_similarity: float,
-    threshold_novel: float = 0.3,
-    threshold_related: float = 0.7,
-    threshold_near_duplicate: float = 0.95,
-) -> dict:
-    """Classify capture novelty based on similarity to existing memory."""
-    from agents.common.schemas.embedding import classify_novelty
-    return classify_novelty(max_similarity, threshold_novel, threshold_related, threshold_near_duplicate)
-
-
-# ---------- Capture Log ---------- #
-CAPTURE_LOG_PATH = os.path.join(os.path.expanduser("~"), ".rune", "capture_log.jsonl")
-
-
-def _append_capture_log(
-    record_id: str, title: str, domain: str, mode: str,
-    action: str = "captured", novelty_class: str = "", novelty_score: float = 0.0,
-):
-    """Append a capture event to the local JSONL log (atomic, secure permissions)."""
-    try:
-        entry_dict = {
-            "ts": datetime.now(timezone.utc).isoformat(),
-            "action": action,
-            "id": record_id,
-            "title": title,
-            "domain": domain,
-            "mode": mode,
-        }
-        if novelty_class:
-            entry_dict["novelty_class"] = novelty_class
-            entry_dict["novelty_score"] = novelty_score
-        entry = json.dumps(entry_dict, ensure_ascii=False)
-        fd = os.open(CAPTURE_LOG_PATH, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0o600)
-        with os.fdopen(fd, "a") as f:
-            f.write(entry + "\n")
-    except Exception as e:
-        logger.debug("Capture log write failed: %s", e)
-
-
-def _read_capture_log(limit: int = 20, domain: str = None, since: str = None) -> list:
-    """Read capture log entries in reverse chronological order."""
-    if not os.path.exists(CAPTURE_LOG_PATH):
-        return []
-    try:
-        with open(CAPTURE_LOG_PATH, "r") as f:
-            lines = f.readlines()
-    except Exception:
-        return []
-
-    entries = []
-    for line in reversed(lines):
-        line = line.strip()
-        if not line:
-            continue
-        try:
-            entry = json.loads(line)
-        except json.JSONDecodeError:
-            continue
-        if domain and entry.get("domain") != domain:
-            continue
-        if since:
-            entry_ts = entry.get("ts", "")
-            if entry_ts < since:
-                continue
-        entries.append(entry)
-        if len(entries) >= limit:
-            break
-    return entries
-
-
-def _set_dormant_with_reason(reason: str):
-    """Update config.json to dormant state with a reason and timestamp"""
-    config_path = os.path.join(os.path.expanduser("~"), ".rune", "config.json")
-    try:
-        if not os.path.exists(config_path):
-            return
-        with open(config_path) as f:
-            data = json.load(f)
-        if data.get("state") == "dormant" and data.get("dormant_reason") == reason:
-            return  # already set to this reason — no change needed
-        data["state"] = "dormant"
-        data["dormant_reason"] = reason
-        data["dormant_since"] = datetime.now(timezone.utc).isoformat()
-        fd = os.open(config_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
-        with os.fdopen(fd, "w") as f:
-            json.dump(data, f, indent=2)
-        logger.warning("Switched to dormant state: %s", reason)
-    except Exception as e:
-        logger.debug("Failed to update config dormant state: %s", e)
-
-
-async def _async_fetch_keys_from_vault(
-    vault_endpoint: str,
-    vault_token: str,
-    key_base_path: str,
-    ca_cert: str = None,
-    tls_disable: bool = False,
-) -> tuple:
-    """
-    Async core: fetches public keys (EncKey, EvalKey) and per-agent metadata
-    DEK from Rune-Vault via gRPC.
-
-    The Vault bundle includes key_id, index_name, agent_id, and agent_dek
-    so the client discovers them dynamically.
-
-    Args:
-        vault_endpoint: Rune-Vault gRPC endpoint
-        vault_token: Authentication token
-        key_base_path: Root key directory (e.g. ~/.rune/keys).
-            Keys are saved under key_base_path/<key_id>/.
-        ca_cert: Path to CA certificate PEM for self-signed certs.
-        tls_disable: If True, use insecure plaintext channel.
-
-    Returns:
-        tuple: (success, index_name, key_id, agent_id, agent_dek_bytes, envector_endpoint, envector_api_key)
-    """
-    client = VaultClient(
-        vault_endpoint=vault_endpoint,
-        vault_token=vault_token,
-        ca_cert=ca_cert,
-        tls_disable=tls_disable,
-    )
-    try:
-        bundle = await client.get_public_key()
-
-        # Extract metadata before saving key files
-        vault_index_name = bundle.pop("index_name", None)
-        vault_key_id = bundle.pop("key_id", None)
-        vault_agent_id = bundle.pop("agent_id", None)
-        vault_agent_dek_b64 = bundle.pop("agent_dek", None)
-        vault_envector_endpoint = bundle.pop("envector_endpoint", None)
-        vault_envector_api_key = bundle.pop("envector_api_key", None)
-
-        if vault_index_name:
-            logger.info(f"Vault provided index_name: {vault_index_name}")
-        if vault_key_id:
-            logger.info(f"Vault provided key_id: {vault_key_id}")
-        else:
-            logger.warning("Vault did not provide key_id — key directory cannot be determined")
-            return False, vault_index_name, None, None, None, None, None
-        if vault_agent_id:
-            logger.info(f"Vault provided agent_id: {vault_agent_id}")
-
-        # Decode agent DEK from base64
-        agent_dek_bytes = None
-        if vault_agent_dek_b64:
-            import base64
-            try:
-                agent_dek_bytes = base64.b64decode(vault_agent_dek_b64)
-            except (base64.binascii.Error, ValueError) as e:
-                logger.error(f"Failed to decode agent_dek from Vault (invalid base64): {e}")
-                return False, vault_index_name, vault_key_id, vault_agent_id, None, None, None
-            if len(agent_dek_bytes) != 32:
-                logger.error(f"agent_dek has invalid length {len(agent_dek_bytes)} bytes (expected 32 for AES-256)")
-                return False, vault_index_name, vault_key_id, vault_agent_id, None, None, None
-
-        # Save keys under key_base_path/<key_id>/ with restrictive permissions
-        key_dir = os.path.join(key_base_path, vault_key_id)
-        os.makedirs(key_dir, mode=0o700, exist_ok=True)
-
-        for filename, key_content in bundle.items():
-            filepath = os.path.join(key_dir, filename)
-            fd = os.open(filepath, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
-            with os.fdopen(fd, 'w') as f:
-                f.write(key_content)
-            logger.info(f"Saved {filename} to {filepath}")
-
-        return True, vault_index_name, vault_key_id, vault_agent_id, agent_dek_bytes, vault_envector_endpoint, vault_envector_api_key
-
-    except Exception as e:
-        logger.error(f"Failed to fetch keys from Vault: {e}")
-        return False, None, None, None, None, None, None
-    finally:
-        await client.close()
-
-
-def fetch_keys_from_vault(
-    vault_endpoint: str,
-    vault_token: str,
-    key_base_path: str,
-    ca_cert: str = None,
-    tls_disable: bool = False,
-) -> tuple:
-    """
-    Fetches public keys from Rune-Vault. Safe to call from both sync (main)
-    and async (reload_pipelines) contexts.
-
-    Args:
-        vault_endpoint: Rune-Vault endpoint URL
-        vault_token: Authentication token for Vault
-        key_base_path: Root key directory (e.g. ~/.rune/keys)
-        ca_cert: Path to CA certificate PEM for self-signed certs.
-        tls_disable: If True, use insecure plaintext channel.
-
-    Returns:
-        tuple: (success, index_name, key_id, agent_id, agent_dek_bytes, envector_endpoint, envector_api_key)
-    """
-    import asyncio
-    _fail = (False, None, None, None, None, None, None)
-
-    try:
-        asyncio.get_running_loop()
-        # Already inside an event loop (e.g. FastMCP startup) —
-        # run the async fetch in a separate thread with its own loop.
-        import concurrent.futures
-        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
-            future = pool.submit(
-                asyncio.run,
-                _async_fetch_keys_from_vault(vault_endpoint, vault_token, key_base_path, ca_cert, tls_disable),
-            )
-            try:
-                return future.result(timeout=30)
-            except concurrent.futures.TimeoutError:
-                logger.error("Vault key fetch timed out after 30 seconds")
-                return _fail
-            except Exception as e:
-                logger.error(f"Vault key fetch failed in thread: {e}")
-                return _fail
-    except RuntimeError:
-        # No running event loop — safe to use asyncio.run() directly.
-        try:
-            return asyncio.run(
-                _async_fetch_keys_from_vault(vault_endpoint, vault_token, key_base_path, ca_cert, tls_disable)
-            )
-        except Exception as e:
-            logger.error(f"Vault key fetch failed: {e}")
-            return _fail
-
-class MCPServerApp:
-    """
-    Main application class for the MCP server.
-
-    Security Model (with Rune-Vault):
-    - MCP Server handles embeddings, query encryption, and orchestration
-    - Rune-Vault holds secret key and performs all decryption
-    - Agent never has access to secret key
-    """
-    # Canonical key path (key_id is discovered from Vault at runtime)
-    DEFAULT_KEY_PATH = os.path.expanduser("~/.rune/keys")
-
-    def __init__(
-            self,
-            envector_adapter: Optional[EnVectorSDKAdapter] = None,
-            mcp_server_name: str = "envector_mcp_server",
-            vault_client: Optional[VaultClient] = None,
-            vault_index_name: Optional[str] = None,
-            key_path: Optional[str] = None,
-            key_id: Optional[str] = None,
-            agent_id: Optional[str] = None,
-            agent_dek: Optional[bytes] = None,
-            scribe_pipeline: Optional[Dict[str, Any]] = None,
-            retriever_pipeline: Optional[Dict[str, Any]] = None,
-        ) -> None:
-        """
-        Initializes the MCPServerApp with the given adapter and server name.
-        Args:
-            envector_adapter (EnVectorSDKAdapter): The enVector SDK adapter instance.
-            mcp_server_name (str): The name of the MCP server.
-            vault_client (VaultClient): Optional Vault client for secure decryption.
-                Embedding is initialized from config.json via _init_pipelines (no CLI override).
-            vault_index_name (str): Team index name provisioned by Vault admin (optional).
-            key_path (str): Root directory for encryption keys.
-            key_id (str): Key identifier (subdirectory under key_path).
-                Discovered from Vault at runtime; no hardcoded default.
-            agent_id (str): Per-agent identifier for metadata encryption (from Vault).
-            agent_dek (bytes): Per-agent AES-256 DEK for app-layer metadata encryption.
-            scribe_pipeline (dict): Pre-initialized scribe pipeline components.
-            retriever_pipeline (dict): Pre-initialized retriever pipeline components.
-        """
-        # adapters
-        self.envector = envector_adapter
-        self.embedding = None  # set by _init_pipelines from config
-        self.vault = vault_client
-        self._vault_index_name = vault_index_name
-        self._key_path = key_path or self.DEFAULT_KEY_PATH
-        self._key_id = key_id  # Vault-provided, no hardcoded fallback
-        self._agent_id = agent_id
-        self._agent_dek = agent_dek
-        self._scribe = scribe_pipeline
-        self._retriever = retriever_pipeline
-        self._envector_endpoint: Optional[str] = None
-        self._envector_api_key: Optional[str] = None
-        self._client_provider_override: Optional[str] = None
-        self._active_llm_provider: Optional[str] = None
-        self._active_tier2_provider: Optional[str] = None
-        # mcp
-        self.mcp = FastMCP(name=mcp_server_name)
-        # Background pipeline initialization
-        self._pipelines_ready = threading.Event()
-        self._pipelines_error: Optional[str] = None
-
-        # ---------- Confidence Calculation (inlined from Synthesizer) ---------- #
-        def _calculate_confidence(results) -> float:
-            """Calculate overall confidence from search results (pure math, no LLM)."""
-            if not results:
-                return 0.0
-            certainty_weights = {
-                "supported": 1.0,
-                "partially_supported": 0.6,
-                "unknown": 0.3,
-            }
-            total_weight = 0.0
-            total_score = 0.0
-            for i, r in enumerate(results[:5]):
-                position_weight = 1.0 / (i + 1)
-                cert_weight = certainty_weights.get(r.certainty, 0.3)
-                weight = position_weight * cert_weight * r.score
-                total_weight += weight
-                total_score += weight
-            if total_weight == 0:
-                return 0.0
-            return round(min(1.0, total_score / 2.0), 2)
-
-        # ---------- Common Query Preprocessing ---------- #
-        def _preprocess(raw_query: Any) -> Union[List[float], List[List[float]]]:
-            """Convert raw query input (string, ndarray, list) into a valid vector or batch of vectors."""
-            if isinstance(raw_query, str):
-                raw_query = raw_query.strip()
-
-                if self.embedding is not None:
-                    return self.embedding.embed([raw_query])[0]
-
-                if not raw_query:
-                    raise ValueError("`query` string is empty. Provide a JSON array of floats or precomputed embedding.")
-                try:
-                    raw_query = json.loads(raw_query)
-                except json.JSONDecodeError as exc:
-                    raise ValueError(
-                        "Plain text is not supported for `query`. Convert the text into an embedding vector "
-                        "and pass it as a JSON array (e.g., [[0.1, 0.2], ...])."
-                    ) from exc
-
-            if isinstance(raw_query, np.ndarray):
-                raw_query = raw_query.tolist()
-            elif isinstance(raw_query, list) and all(isinstance(q, np.ndarray) for q in raw_query):
-                raw_query = [q.tolist() for q in raw_query]
-
-            def _is_vector(value: Any) -> bool:
-                return isinstance(value, list) and all(isinstance(v, (int, float)) for v in value)
-
-            if _is_vector(raw_query):
-                return raw_query
-            if isinstance(raw_query, list) and all(_is_vector(item) for item in raw_query):
-                return raw_query
-
-            raise ValueError(
-                "`query` must be a list of floats or a list of float lists. "
-                f"Received type: {type(raw_query).__name__}"
-            )
-
-        def _infer_provider_from_context(ctx: Optional[Context]) -> Optional[str]:
-            """
-            Infer LLM provider from MCP initialize clientInfo.name.
-            This is best-effort and only used when config provider is set to "auto".
-            """
-            if ctx is None or ctx.request_context is None:
-                return None
-
-            try:
-                session = getattr(ctx.request_context, "session", None)
-                params = getattr(session, "client_params", None)
-                client_info = getattr(params, "clientInfo", None) or getattr(params, "client_info", None)
-                client_name = (getattr(client_info, "name", "") or "").lower()
-            except Exception:
-                return None
-
-            if not client_name:
-                return None
-            if any(token in client_name for token in ("claude", "anthropic")):
-                return "anthropic"
-            if any(token in client_name for token in ("openai", "codex", "chatgpt")):
-                return "openai"
-            if any(token in client_name for token in ("gemini", "google", "antigravity", "openclaw")):
-                return "google"
-            return None
-
-        def _maybe_reload_for_auto_provider(ctx: Optional[Context]) -> None:
-            inferred = _infer_provider_from_context(ctx)
-            if not inferred:
-                return
-            if inferred == self._client_provider_override:
-                return
-
-            self._client_provider_override = inferred
-            logger.info("Auto provider inferred from MCP clientInfo: %s", inferred)
-            refresh = self._init_pipelines()
-            if refresh.get("errors"):
-                logger.warning("Auto provider reload had warnings: %s", refresh["errors"])
-
-        # ---------- MCP Tools: Vault Health Check ---------- #
-        @self.mcp.tool(
-            name="vault_status",
-            description="Check Rune-Vault connection status and security mode.",
-            annotations=ToolAnnotations(readOnlyHint=True, destructiveHint=False)
-        )
-        async def tool_vault_status() -> Dict[str, Any]:
-            """
-            Returns the current Vault integration status.
-
-            Returns:
-                Dict with Vault connection status and security mode information.
-            """
-            if self.vault is None:
-                return {
-                    "ok": True,
-                    "vault_configured": False,
-                    "secure_search_available": False,
-                    "mode": "standard (no Vault)",
-                    "team_index_name": self._vault_index_name,
-                    "warning": "secret key may be accessible locally. Configure Vault for secure mode."
-                }
-
-            # Check Vault health via /health endpoint
-            try:
-                vault_healthy = await self.vault.health_check()
-                return {
-                    "ok": True,
-                    "vault_configured": True,
-                    "vault_endpoint": getattr(self.vault, 'vault_endpoint', 'unknown'),
-                    "secure_search_available": vault_healthy,
-                    "mode": "secure (Vault-backed)",
-                    "vault_healthy": vault_healthy,
-                    "team_index_name": self._vault_index_name,
-                }
-            except Exception as e:
-                err = make_error(VaultConnectionError(f"Vault health check failed: {e}"))
-                err["vault_configured"] = True
-                return err
-
-        # ---------- MCP Tools: Diagnostics ---------- #
-        @self.mcp.tool(
-            name="diagnostics",
-            description=(
-                "System health check tool for the Rune. "
-                "Reports status of Vault connection, encryption keys, "
-                "pipeline initialization, and enVector cloud reachability."
-            ),
-            annotations=ToolAnnotations(readOnlyHint=True, destructiveHint=False)
-        )
-        async def tool_diagnostics() -> Dict[str, Any]:
-            """
-            Returns diagnostic reports about Rune subsystems"
-
-            Returns:
-                Dict with subsystem health information
-            """
-            import time
-            import sys
-
-            report: Dict[str, Any] = {"ok": True}
-
-            # Environment Info
-            try:
-                report["environment"] = {
-                    "os": sys.platform,
-                    "python_version": sys.version.split(" ")[0],
-                    "cwd": os.getcwd(),
-                }
-            except Exception as e:
-                report["environment"] = {"error": str(e)}
-
-            # Dormant state info
-            config_path = os.path.join(os.path.expanduser("~"), ".rune", "config.json")
-            if os.path.exists(config_path):
-                try:
-                    with open(config_path) as _cf:
-                        _cfg_data = json.load(_cf)
-                    report["state"] = _cfg_data.get("state", "unknown")
-                    if _cfg_data.get("dormant_reason"):
-                        report["dormant_reason"] = _cfg_data["dormant_reason"]
-                    if _cfg_data.get("dormant_since"):
-                        report["dormant_since"] = _cfg_data["dormant_since"]
-                except Exception:
-                    pass
-
-            # Vault connection
-            vault_info: Dict[str, Any] = {
-                "configured": self.vault is not None,
-                "healthy": False,
-                "endpoint": None,
-            }
-
-            if self.vault is not None:
-                vault_info["endpoint"] = getattr(self.vault, "vault_endpoint", "unknown")
-                try:
-                    vault_info["healthy"] = await self.vault.health_check()
-                except Exception as e:
-                    vault_info["healthy"] = False
-                    vault_info["error"] = str(e)
-            report["vault"] = vault_info
-
-            # Encryption Keys
-            key_id = self._key_id
-            enc_key_loaded = False
-            if key_id and self._key_path:
-                enc_key_file = os.path.join(self._key_path, key_id, "EncKey.json")
-                enc_key_loaded = os.path.exists(enc_key_file)
-
-            keys_info: Dict[str, Any] = {
-                "enc_key_loaded": enc_key_loaded,
-                "key_id": key_id,
-                "agent_dek_loaded": self._agent_dek is not None,
-            }
-            report["keys"] = keys_info
-
-            # Pipelines
-            pipelines_info: Dict[str, Any] = {
-                "scribe": self._scribe is not None,
-                "retriever": self._retriever is not None,
-                "llm_provider": self._active_llm_provider,
-            }
-            report["pipelines"] = pipelines_info
-
-            # Embedding model
-            embedding_info: Dict[str, Any] = {
-                "model": None,
-                "mode": None,
-            }
-            if self._scribe and self._scribe.get("embedding_service"):
-                svc = self._scribe["embedding_service"]
-                embedding_info["model"] = getattr(svc, "_model", "unknown")
-                embedding_info["mode"] = getattr(svc, "_mode", "unknown")
-            report["embedding"] = embedding_info
-
-            # enVector Cloud
-            envector_info: Dict[str, Any] = {
-                "reachable": False,
-                "latency_ms": None,
-            }
-
-            if self.envector is not None:
-                import concurrent.futures as _cf
-                ENVECTOR_DIAGNOSIS_TIMEOUT = 5.0  # seconds
-                _pool = _cf.ThreadPoolExecutor(max_workers=1)
-                try:
-                    t0 = time.monotonic()
-                    _future = _pool.submit(self.envector.invoke_get_index_list)
-                    try:
-                        _future.result(timeout=ENVECTOR_DIAGNOSIS_TIMEOUT)
-                        latency = (time.monotonic() - t0) * 1000
-                        envector_info["reachable"] = True
-                        envector_info["latency_ms"] = round(latency, 1)
-                    except _cf.TimeoutError:
-                        elapsed = round((time.monotonic() - t0) * 1000, 1)
-                        envector_info["error"] = (
-                            f"Health check timed out after {ENVECTOR_DIAGNOSIS_TIMEOUT:.0f}s "
-                            f"(elapsed: {elapsed}ms). "
-                            "Run /rune:activate to pre-warm the connection, then retry /rune:status."
-                        )
-                        envector_info["error_type"] = "timeout"
-                        envector_info["elapsed_ms"] = elapsed
-                except Exception as e:
-                    err_str = str(e)
-                    # Classify errors for more hints to users
-                    if "UNAVAILABLE" in err_str or "Connection refused" in err_str:
-                        error_type = "connection_refused"
-                        hint = "Check that the enVector endpoint is correct and reachable from this machine"
-                    elif "UNAUTHENTICATED" in err_str or "401" in err_str:
-                        error_type = "auth_failure"
-                        hint = "enVector API key may be invalid or expired"
-                    elif "DEADLINE_EXCEEDED" in err_str:
-                        error_type = "deadline_exceeded"
-                        hint = (
-                            "The enVector gRPC deadline was exceeded. "
-                            "Run /rune:activate to pre-warm, then retry /rune:status"
-                        )
-                    else:
-                        error_type = "unknown"
-                        hint = "Run /rune:activate to reinitialize the connection, or check network connectivity"
-                    envector_info["error"] = err_str
-                    envector_info["error_type"] = error_type
-                    envector_info["hint"] = hint
-                finally:
-                    # Return immediately without waiting on timeout
-                    _pool.shutdown(wait=False)
-            report["envector"] = envector_info
-
-            # Result
-            if self.vault is not None and not vault_info["healthy"]:
-                report["ok"] = False
-            if not enc_key_loaded:
-                report["ok"] = False
-
-            return report
-
-        # ---------- MCP Tools: Capture (Scribe Pipeline) ---------- #
-        @self.mcp.tool(
-            name="capture",
-            description=(
-                "Capture a significant organizational decision into FHE-encrypted team memory. "
-                "PRIMARY: Agent-delegated mode — pass `extracted` JSON with the agent's own "
-                "evaluation and extraction. The MCP server stores it without additional LLM calls. "
-                "LEGACY: If `extracted` is omitted and API keys are configured, falls back to "
-                "a 3-tier server-side pipeline (pattern detection → LLM filter → LLM extraction)."
-            ),
-            annotations=ToolAnnotations(readOnlyHint=False, destructiveHint=False)
-        )
-        async def tool_capture(
-            text: Annotated[str, Field(description="The text containing a potential decision or significant context to capture")],
-            source: Annotated[str, Field(description="Source of the text (e.g., 'claude_agent', 'slack', 'github')")] = "claude_agent",
-            user: Annotated[Optional[str], Field(description="User who authored the text")] = None,
-            channel: Annotated[Optional[str], Field(description="Channel or location where the text originated")] = None,
-            extracted: Annotated[Optional[str], Field(description="Pre-extracted JSON from calling agent (agent-delegated mode). When provided, Tier 2/3 are skipped.")] = None,
-            ctx: Optional[Context] = None,
-        ) -> Dict[str, Any]:
-            _maybe_reload_for_auto_provider(ctx)
-
-            wait_err = self._ensure_pipelines()
-            if wait_err:
-                return wait_err
-
-            if self._scribe is None:
-                return make_error(PipelineNotReadyError(
-                    "Scribe pipeline not initialized.",
-                    recovery_hint="Run /rune:activate to reinitialize pipelines, or restart Claude Code if the problem persists.",
-                ))
-
-            if not self._vault_index_name:
-                return make_error(PipelineNotReadyError(
-                    "No index name available. Vault must provide a team index name."
-                ))
-
-            try:
-                from datetime import datetime, timezone
-                from agents.scribe.record_builder import RawEvent
-                from agents.scribe.llm_extractor import (
-                    ExtractionResult, ExtractedFields, PhaseExtractedFields,
-                )
-                from agents.common.llm_utils import parse_llm_json
-
-                record_builder = self._scribe["record_builder"]
-                envector_client = self._scribe["envector_client"]
-                embedding_service = self._scribe["embedding_service"]
-                detector = self._scribe.get("detector")
-                tier2_filter = self._scribe.get("tier2_filter")
-
-                # ===== PRIMARY: Agent-delegated mode =====
-                # The calling agent (Claude/Gemini/Codex) has already evaluated and
-                # extracted the decision.  We just validate, build records, and store.
-                if extracted is not None:
-                    return await self._capture_single(
-                        text=text,
-                        source=source,
-                        user=user,
-                        channel=channel,
-                        extracted=extracted,
-                    )
-
-                # ===== FALLBACK: Legacy 3-tier pipeline (requires API keys) =====
-                # Retained for backward compatibility.  New integrations should use
-                # agent-delegated mode above.
-                if detector is None:
-                    return {
-                        "ok": True,
-                        "captured": False,
-                        "reason": "No `extracted` JSON provided and legacy pipeline not available "
-                                  "(no API keys configured). Use agent-delegated mode by passing "
-                                  "the `extracted` parameter.",
-                    }
-                raw_event = RawEvent(
-                    text=text,
-                    user=user or "unknown",
-                    channel=channel or "claude_session",
-                    timestamp=str(datetime.now(timezone.utc).timestamp()),
-                    source=source,
-                )
-                return await self._legacy_standard_capture(
-                    text=text,
-                    raw_event=raw_event,
-                    detector=detector,
-                    tier2_filter=tier2_filter,
-                    record_builder=record_builder,
-                    envector_client=envector_client,
-                    embedding_service=embedding_service,
-                )
-
-            except VaultError as e:
-                logger.error(f"Capture failed (Vault): {e}", exc_info=True)
-                _set_dormant_with_reason("vault_unreachable")
-                return make_error(VaultConnectionError(
-                    str(e),
-                    recovery_hint=(
-                        "Vault error during capture. Check: "
-                        "(1) Is the Vault server running? "
-                        "(2) Is your token valid? "
-                        "Run /rune:status for diagnostics."
-                    ),
-                ))
-            except (ConnectionError, OSError) as e:
-                logger.error(f"Capture failed (network): {e}", exc_info=True)
-                _set_dormant_with_reason("envector_unreachable")
-                return make_error(EnvectorConnectionError(
-                    str(e),
-                    recovery_hint=(
-                        "Network error during capture. Check: "
-                        "(1) Is the enVector endpoint reachable? "
-                        "(2) Is your API key valid? "
-                        "Run /rune:status for diagnostics."
-                    ),
-                ))
-            except ValueError as e:
-                logger.error(f"Capture failed (input): {e}", exc_info=True)
-                return make_error(InvalidInputError(str(e)))
-            except Exception as e:
-                logger.error(f"Capture failed: {e}", exc_info=True)
-                return make_error(e)
-
-        # ---------- MCP Tools: Batch Capture (Session-End Sweep) ---------- #
-        @self.mcp.tool(
-            name="batch_capture",
-            description=(
-                "Batch-capture multiple decisions at once (session-end sweep). "
-                "Each item uses the same format as the `capture` tool's `extracted` parameter. "
-                "Items are processed independently — one failure does not abort others. "
-                "Novelty check runs per item; near-duplicates are skipped."
-            ),
-            annotations=ToolAnnotations(readOnlyHint=False, destructiveHint=False)
-        )
-        async def tool_batch_capture(
-            items: Annotated[str, Field(description="JSON array of extracted decision objects (same format as capture's extracted parameter)")],
-            source: Annotated[str, Field(description="Source (e.g., 'claude_agent')")] = "claude_agent",
-            user: Annotated[Optional[str], Field(description="User who authored the decisions")] = None,
-            channel: Annotated[Optional[str], Field(description="Channel or context")] = None,
-            ctx: Optional[Context] = None,
-        ) -> Dict[str, Any]:
-            _maybe_reload_for_auto_provider(ctx)
-
-            wait_err = self._ensure_pipelines()
-            if wait_err:
-                return wait_err
-
-            if self._scribe is None:
-                return make_error(PipelineNotReadyError("Scribe pipeline not initialized."))
-            if not self._vault_index_name:
-                return make_error(PipelineNotReadyError("No index name available."))
-
-            try:
-                items_list = json.loads(items)
-            except json.JSONDecodeError as e:
-                return {"ok": False, "error": f"Invalid JSON: {e}"}
-
-            if not isinstance(items_list, list):
-                return {"ok": False, "error": "items must be a JSON array"}
-
-            if len(items_list) == 0:
-                return {"ok": True, "total": 0, "results": [], "captured": 0, "skipped": 0, "errors": 0}
-
-            results = []
-            for i, item in enumerate(items_list):
-                title = ""
-                try:
-                    title = item.get("title", "") if isinstance(item, dict) else ""
-                    item_text = item.get("reusable_insight") or item.get("title") or "[batch_capture]" if isinstance(item, dict) else "[batch_capture]"
-                    result = await self._capture_single(
-                        text=item_text,
-                        source=source,
-                        user=user,
-                        channel=channel,
-                        extracted=json.dumps(item),
-                    )
-                    if result.get("captured"):
-                        status = "captured"
-                        novelty_class = result.get("novelty", {}).get("class", "novel")
-                    elif result.get("novelty", {}).get("class") == "near_duplicate":
-                        status = "near_duplicate"
-                        novelty_class = "near_duplicate"
-                    else:
-                        status = "skipped"
-                        novelty_class = result.get("novelty", {}).get("class", "")
-                    results.append({
-                        "index": i,
-                        "title": title,
-                        "status": status,
-                        "novelty": novelty_class,
-                    })
-                except Exception as e:
-                    logger.warning("batch_capture item %d failed: %s", i, e)
-                    results.append({
-                        "index": i,
-                        "title": title,
-                        "status": "error",
-                        "error": str(e),
-                    })
-
-            captured = sum(1 for r in results if r["status"] == "captured")
-            skipped = sum(1 for r in results if r["status"] in ("skipped", "near_duplicate"))
-            errors = sum(1 for r in results if r["status"] == "error")
-
-            return {
-                "ok": True,
-                "total": len(results),
-                "results": results,
-                "captured": captured,
-                "skipped": skipped,
-                "errors": errors,
-            }
-
-        # ---------- MCP Tools: Recall (Retriever Pipeline) ---------- #
-        @self.mcp.tool(
-            name="recall",
-            description=(
-                "Search and synthesize answers from FHE-encrypted team memory via Vault-secured pipeline. "
-                "Pipeline: (1) query expansion and intent detection, "
-                "(2) encrypted similarity scoring on enVector Cloud, "
-                "(3) Rune-Vault decrypts result ciphertext (secret key never leaves Vault). "
-                "Use for questions about past decisions, trade-offs, and organizational knowledge."
-            ),
-            annotations=ToolAnnotations(readOnlyHint=True, destructiveHint=False)
-        )
-        async def tool_recall(
-            query: Annotated[str, Field(description="Natural language question about past decisions or organizational context")],
-            topk: Annotated[int, Field(description="Number of results to consider for synthesis")] = 5,
-            domain: Annotated[Optional[str], Field(description="Filter by domain (e.g. 'architecture', 'security')")] = None,
-            status: Annotated[Optional[str], Field(description="Filter by status (e.g. 'accepted', 'proposed')")] = None,
-            since: Annotated[Optional[str], Field(description="Filter records after this ISO date (e.g. '2026-01-01')")] = None,
-            ctx: Optional[Context] = None,
-        ) -> Dict[str, Any]:
-            _maybe_reload_for_auto_provider(ctx)
-
-            wait_err = self._ensure_pipelines()
-            if wait_err:
-                return wait_err
-
-            if self._retriever is None:
-                return make_error(PipelineNotReadyError(
-                    "Retriever pipeline not initialized.",
-                    recovery_hint="Run /rune:activate to reinitialize pipelines, or restart Claude Code if the problem persists.",
-                ))
-
-            if topk > 10:
-                return make_error(InvalidInputError("topk must be 10 or less."))
-
-            try:
-                query_processor = self._retriever["query_processor"]
-                searcher = self._retriever["searcher"]
-                synthesizer = self._retriever.get("synthesizer")
-
-                # Step 1: Parse query (intent detection, entity extraction, query expansion)
-                parsed_query = query_processor.parse(query)
-
-                # Step 2: Search enVector (over-fetch, post-filter, recency weighting)
-                filters = {}
-                if domain:
-                    filters["domain"] = domain
-                if status:
-                    filters["status"] = status
-                if since:
-                    filters["since"] = since
-                results = await searcher.search(parsed_query, topk=topk, filters=filters or None)
-
-                # Step 3: Return results (agent synthesizes) or use server-side synthesizer
-                # Primary path: raw results for agent-side synthesis (no LLM key needed)
-                if synthesizer is None or not synthesizer.has_llm:
-                    confidence = _calculate_confidence(results)
-                    formatted_results = []
-                    for r in results:
-                        entry = {
-                            "record_id": r.record_id,
-                            "title": r.title,
-                            "content": r.payload_text,
-                            "domain": r.domain,
-                            "certainty": r.certainty,
-                            "score": r.score,
-                        }
-                        if r.group_id:
-                            entry["group_id"] = r.group_id
-                            entry["group_type"] = r.group_type
-                            entry["phase_seq"] = r.phase_seq
-                            entry["phase_total"] = r.phase_total
-                        formatted_results.append(entry)
-
-                    sources = [
-                        {
-                            "record_id": r.record_id,
-                            "title": r.title,
-                            "domain": r.domain,
-                            "certainty": r.certainty,
-                            "score": r.score,
-                        }
-                        for r in results[:5]
-                    ]
-
-                    return {
-                        "ok": True,
-                        "found": len(results),
-                        "results": formatted_results,
-                        "confidence": confidence,
-                        "sources": sources,
-                        "synthesized": False,
-                    }
-
-                # Fallback: server-side synthesis when LLM key is available
-                answer = synthesizer.synthesize(parsed_query, results)
-                return {
-                    "ok": True,
-                    "found": len(results),
-                    "answer": answer.answer,
-                    "confidence": answer.confidence,
-                    "sources": answer.sources,
-                    "warnings": answer.warnings if answer.warnings else None,
-                    "related_queries": answer.related_queries if answer.related_queries else None,
-                    "synthesized": True,
-                }
-
-            except VaultError as e:
-                logger.error(f"Recall failed (Vault): {e}", exc_info=True)
-                _set_dormant_with_reason("vault_unreachable")
-                return make_error(VaultDecryptionError(
-                    str(e),
-                    recovery_hint=(
-                        "Vault decryption failed during recall. Check: "
-                        "(1) Is your Vault token valid? "
-                        "(2) Does the token have permission for this team index? "
-                        "Run /rune:status for diagnostics or /rune:configure to update credentials."
-                    ),
-                ))
-            except (ConnectionError, OSError) as e:
-                logger.error(f"Recall failed (network): {e}", exc_info=True)
-                _set_dormant_with_reason("envector_unreachable")
-                return make_error(EnvectorConnectionError(
-                    str(e),
-                    recovery_hint=(
-                        "Network error during recall. Check: "
-                        "(1) Is the enVector endpoint reachable? "
-                        "(2) Is your API key still valid? "
-                        "Run /rune:status for diagnostics."
-                    ),
-                ))
-            except ValueError as e:
-                logger.error(f"Recall failed (input): {e}", exc_info=True)
-                return make_error(InvalidInputError(str(e)))
-            except Exception as e:
-                logger.error(f"Recall failed: {e}", exc_info=True)
-                return make_error(e)
-
-        # ---------- MCP Tools: Reload Pipelines ---------- #
-        @self.mcp.tool(
-            name="reload_pipelines",
-            description=(
-                "Re-read ~/.rune/config.json and reinitialize scribe/retriever pipelines. "
-                "Call this after the Rune activate command changes state to 'active' "
-                "to avoid restarting the current agent session."
-            ),
-            annotations=ToolAnnotations(readOnlyHint=False, destructiveHint=False)
-        )
-        async def tool_reload_pipelines() -> Dict[str, Any]:
-            self._pipelines_ready.wait()  # wait for background init to finish first
-            self._pipelines_error = None  # clear stale error before reload
-            result = self._init_pipelines()
-
-            # Pre-warm the enVector connection (blocking) immediately after pipeline init
-            #
-            # Prevent subsequent '/rune:status' diagnostics check is timed out during RegisterKey and
-            # reported enVector as "unreachable"
-            envector_warmup: Dict[str, Any] = {}
-            if result["scribe"] and self.envector is not None:
-                import time as _time
-                import concurrent.futures as _cf
-                WARMUP_TIMEOUT = 60.0  # seconds; RegisterKey can take tens of seconds
-                _pool = _cf.ThreadPoolExecutor(max_workers=1)
-                try:
-                    _t0 = _time.monotonic()
-                    _future = _pool.submit(self.envector.invoke_get_index_list)
-                    _future.result(timeout=WARMUP_TIMEOUT)
-                    envector_warmup = {
-                        "ok": True,
-                        "latency_ms": round((_time.monotonic() - _t0) * 1000, 1),
-                    }
-                    logger.info("enVector pre-warm completed in %.0fms", envector_warmup["latency_ms"])
-                except _cf.TimeoutError:
-                    envector_warmup = {
-                        "ok": False,
-                        "error": f"Pre-warm timed out after {WARMUP_TIMEOUT:.0f}s",
-                    }
-                    logger.warning("enVector pre-warm timed out after %.0fs", WARMUP_TIMEOUT)
-                except Exception as _e:
-                    envector_warmup = {"ok": False, "error": str(_e)}
-                    logger.warning("enVector pre-warm failed: %s", _e)
-                finally:
-                    _pool.shutdown(wait=False)
-
-            return {
-                "ok": not result["errors"],
-                "state": result["state"],
-                "scribe_initialized": result["scribe"],
-                "retriever_initialized": result["retriever"],
-                "errors": result["errors"] if result["errors"] else None,
-                "envector_warmup": envector_warmup or None,
-            }
-
-        # ---------- MCP Tools: Capture History ---------- #
-        @self.mcp.tool(
-            name="capture_history",
-            description=(
-                "View recent capture history from the local log. "
-                "Returns captured decision records in reverse chronological order. "
-                "Use to check what has been captured, verify captures, or find record IDs for deletion."
-            ),
-            annotations=ToolAnnotations(readOnlyHint=True, destructiveHint=False)
-        )
-        async def tool_capture_history(
-            limit: Annotated[int, Field(description="Number of recent captures to return")] = 20,
-            domain: Annotated[Optional[str], Field(description="Filter by domain (e.g. 'architecture', 'security')")] = None,
-            since: Annotated[Optional[str], Field(description="Filter captures after this ISO date (e.g. '2026-03-01')")] = None,
-        ) -> Dict[str, Any]:
-            entries = _read_capture_log(limit=min(limit, 100), domain=domain, since=since)
-            return {
-                "ok": True,
-                "count": len(entries),
-                "entries": entries,
-            }
-
-        # ---------- MCP Tools: Delete Capture (Soft-Delete) ---------- #
-        @self.mcp.tool(
-            name="delete_capture",
-            description=(
-                "Soft-delete a captured decision record by marking its status as 'reverted'. "
-                "The record remains in storage but is heavily demoted in search results (0.3x score). "
-                "Use capture_history to find record IDs."
-            ),
-            annotations=ToolAnnotations(readOnlyHint=False, destructiveHint=True)
-        )
-        async def tool_delete_capture(
-            record_id: Annotated[str, Field(description="The record ID to soft-delete (e.g. dec_20260316_arch_abc)")],
-        ) -> Dict[str, Any]:
-            wait_err = self._ensure_pipelines()
-            if wait_err:
-                return wait_err
-
-            if self._retriever is None or self._scribe is None:
-                return make_error(PipelineNotReadyError(
-                    "Pipelines not initialized.",
-                    recovery_hint="Run /rune:activate to reinitialize pipelines, or restart Claude Code if the problem persists.",
-                ))
-
-            try:
-                searcher = self._retriever["searcher"]
-
-                # Search for the record by ID
-                from agents.retriever.query_processor import ParsedQuery, TimeScope
-                target = await searcher.search_by_id(record_id)
-                if not target:
-                    return make_error(InvalidInputError(
-                        f"Record '{record_id}' not found in search results. "
-                        "Use capture_history to find valid record IDs."
-                    ))
-
-                # Update status to reverted in metadata
-                metadata = target.metadata
-                metadata["status"] = "reverted"
-
-                # Re-insert with updated metadata
-                envector_client = self._scribe["envector_client"]
-                embedding_service = self._scribe["embedding_service"]
-
-                # Use reusable_insight for embedding if available (schema 2.1+)
-                ri = metadata.get("reusable_insight", "")
-                embedding_text = ri.strip() if ri and ri.strip() else target.payload_text
-                insert_result = envector_client.insert_with_text(
-                    index_name=self._vault_index_name,
-                    texts=[embedding_text],
-                    embedding_service=embedding_service,
-                    metadata=[metadata],
-                )
-
-                if not insert_result.get("ok"):
-                    return make_error(EnvectorInsertError(
-                        f"Re-insert failed: {insert_result.get('error')}"
-                    ))
-
-                _append_capture_log(record_id, target.title, target.domain, "soft-delete", action="deleted")
-                return {
-                    "ok": True,
-                    "deleted": True,
-                    "record_id": record_id,
-                    "title": target.title,
-                    "method": "soft-delete (status=reverted)",
-                }
-
-            except VaultError as e:
-                logger.error(f"Delete failed (Vault): {e}", exc_info=True)
-                _set_dormant_with_reason("vault_unreachable")
-                return make_error(VaultConnectionError(
-                    str(e),
-                    recovery_hint=(
-                        "Vault error during delete. Check: "
-                        "(1) Is the Vault server running? "
-                        "(2) Is your token valid? "
-                        "Run /rune:status for diagnostics."
-                    ),
-                ))
-            except (ConnectionError, OSError) as e:
-                logger.error(f"Delete failed (network): {e}", exc_info=True)
-                _set_dormant_with_reason("envector_unreachable")
-                return make_error(EnvectorConnectionError(
-                    str(e),
-                    recovery_hint=(
-                        "Network error during delete. Check: "
-                        "(1) Is the enVector endpoint reachable? "
-                        "(2) Is your API key valid? "
-                        "Run /rune:status for diagnostics."
-                    ),
-                ))
-            except Exception as e:
-                logger.error(f"Delete failed: {e}", exc_info=True)
-                return make_error(e)
-
-    async def _capture_single(
-        self,
-        text: str,
-        source: str,
-        user: Optional[str],
-        channel: Optional[str],
-        extracted: str,
-    ) -> Dict[str, Any]:
-        """Execute a single agent-delegated capture.
-
-        Extracted from tool_capture() so it can be reused by batch_capture
-        and session-end sweep without duplicating logic.
-
-        The caller is responsible for error handling (try/except); this
-        method raises on failure rather than returning error dicts.
-        """
-        from datetime import datetime, timezone
-        from agents.scribe.record_builder import RawEvent
-        from agents.scribe.llm_extractor import (
-            ExtractionResult, ExtractedFields, PhaseExtractedFields,
-        )
-        from agents.common.llm_utils import parse_llm_json
-
-        if self._scribe is None:
-            return {"ok": False, "error": "Scribe pipeline not initialized."}
-        if not self._vault_index_name:
-            return {"ok": False, "error": "No index name available."}
-
-        record_builder = self._scribe["record_builder"]
-        envector_client = self._scribe["envector_client"]
-        embedding_service = self._scribe["embedding_service"]
-
-        data = parse_llm_json(extracted)
-        if not data:
-            return {"ok": False, "error": "Invalid extracted JSON — could not parse."}
-
-        # Tier 2 check: agent already evaluated
-        tier2 = data.get("tier2", {})
-        if not tier2.get("capture", True):
-            return {
-                "ok": True,
-                "captured": False,
-                "reason": f"Agent rejected: {tier2.get('reason', 'no reason')}",
-            }
-
-        # Domain from agent's tier2 evaluation
-        agent_domain = tier2.get("domain", "general")
-
-        # Parse confidence from agent JSON
-        agent_confidence = data.get("confidence")
-        if isinstance(agent_confidence, (int, float)):
-            agent_confidence = max(0.0, min(1.0, float(agent_confidence)))
-        else:
-            agent_confidence = None
-
-        # Build detection from agent data — no detector needed
-        detection = _detection_from_agent_data(
-            domain=agent_domain,
-            confidence=float(agent_confidence) if agent_confidence is not None else 0.0,
-        )
-
-        # Build ExtractionResult from agent JSON
-
-        phases_data = data.get("phases")
-        if phases_data and len(phases_data) > 1:
-            # Multi-phase or bundle
-            phases = []
-            for p in phases_data[:7]:
-                phases.append(PhaseExtractedFields(
-                    phase_title=str(p.get("phase_title", ""))[:60],
-                    phase_decision=str(p.get("phase_decision", "")),
-                    phase_rationale=str(p.get("phase_rationale", "")),
-                    phase_problem=str(p.get("phase_problem", "")),
-                    alternatives=[str(a) for a in p.get("alternatives", []) if a],
-                    trade_offs=[str(t) for t in p.get("trade_offs", []) if t],
-                    tags=[str(t).lower() for t in p.get("tags", []) if t],
-                ))
-            pre_extraction = ExtractionResult(
-                group_title=str(data.get("group_title", ""))[:60],
-                group_type=str(data.get("group_type", "phase_chain")),
-                group_summary=str(data.get("reusable_insight", "") or data.get("group_title", "")),
-                status_hint=str(data.get("status_hint", "")).lower(),
-                tags=[str(t).lower() for t in data.get("tags", []) if t],
-                confidence=agent_confidence,
-                phases=phases,
-            )
-        else:
-            # Single record (may have phases with 0-1 entries, or flat fields)
-            if phases_data and len(phases_data) == 1:
-                p = phases_data[0]
-                single = ExtractedFields(
-                    title=str(p.get("phase_title", data.get("title", "")))[:60],
-                    rationale=str(p.get("phase_rationale", data.get("rationale", ""))),
-                    problem=str(p.get("phase_problem", data.get("problem", ""))),
-                    alternatives=[str(a) for a in p.get("alternatives", []) if a],
-                    trade_offs=[str(t) for t in p.get("trade_offs", []) if t],
-                    status_hint=str(data.get("status_hint", "")).lower(),
-                    tags=[str(t).lower() for t in p.get("tags", data.get("tags", [])) if t],
-                )
-            else:
-                single = ExtractedFields(
-                    title=str(data.get("title", ""))[:60],
-                    rationale=str(data.get("rationale", "")),
-                    problem=str(data.get("problem", "")),
-                    alternatives=[str(a) for a in data.get("alternatives", []) if a],
-                    trade_offs=[str(t) for t in data.get("trade_offs", []) if t],
-                    status_hint=str(data.get("status_hint", "")).lower(),
-                    tags=[str(t).lower() for t in data.get("tags", []) if t],
-                )
-            pre_extraction = ExtractionResult(
-                group_title=single.title,
-                group_summary=str(data.get("reusable_insight", "")) or "",
-                status_hint=single.status_hint,
-                tags=single.tags,
-                confidence=agent_confidence,
-                single=single,
-            )
-
-        raw_event = RawEvent(
-            text=text,
-            user=user or "unknown",
-            channel=channel or "claude_session",
-            timestamp=str(datetime.now(timezone.utc).timestamp()),
-            source=source,
-        )
-        records = record_builder.build_phases(raw_event, detection, pre_extraction=pre_extraction)
-
-        # ===== Novelty check (Memory-as-Filter) =====
-        # Vault-secured: embed → score → Vault decrypt → compare max similarity
-        embedding_text = _embedding_text_for_record(records[0])
-        novelty_info = {"score": 1.0, "class": "novel", "related": []}
-
-        try:
-            query_vector = embedding_service.embed_single(embedding_text)
-            scoring_result = envector_client.score(self._vault_index_name, query_vector)
-            if scoring_result.get("ok") and scoring_result.get("encrypted_blobs") and self.vault:
-                blobs = scoring_result["encrypted_blobs"]
-                vault_result = await self.vault.decrypt_search_results(
-                    encrypted_blob_b64=blobs[0],
-                    top_k=3,
-                )
-                if vault_result.ok and vault_result.results:
-                    parsed = vault_result.results
-                    max_sim = max(r.get("score", 0.0) for r in parsed)
-                    novelty_info = _classify_novelty(max_sim)
-                    novelty_info["related"] = [
-                        {
-                            "id": r.get("metadata", {}).get("id", ""),
-                            "title": r.get("metadata", {}).get("title", ""),
-                            "similarity": round(r.get("score", 0.0), 3),
-                        }
-                        for r in parsed[:3]
-                    ]
-
-                    # NEAR-DUPLICATE -> skip capture (only blocking case)
-                    if novelty_info["class"] == "near_duplicate":
-                        return {
-                            "ok": True,
-                            "captured": False,
-                            "reason": "Near-duplicate — virtually identical insight already stored",
-                            "novelty": novelty_info,
-                        }
-        except Exception as e:
-            # Novelty check failure is non-fatal — proceed with capture
-            logger.warning("Novelty check failed (non-fatal): %s", e)
-
-        # Embed reusable_insight (schema 2.1) or payload.text (fallback)
-        texts = [_embedding_text_for_record(r) for r in records]
-        metadata = [r.model_dump(mode="json") for r in records]
-        insert_result = envector_client.insert_with_text(
-            index_name=self._vault_index_name,
-            texts=texts,
-            embedding_service=embedding_service,
-            metadata=metadata,
-        )
-
-        if not insert_result.get("ok"):
-            return {"ok": False, "error": f"Insert failed: {insert_result.get('error')}"}
-
-        first = records[0]
-        result = {
-            "ok": True,
-            "captured": True,
-            "record_id": first.id,
-            "summary": first.title,
-            "domain": first.domain.value,
-            "certainty": first.why.certainty.value,
-            "mode": "agent-delegated",
-            "novelty": novelty_info,
-        }
-        if len(records) > 1:
-            result["record_count"] = len(records)
-            result["group_id"] = first.group_id
-            result["group_type"] = first.group_type or "phase_chain"
-        _append_capture_log(
-            first.id, first.title, first.domain.value, "agent-delegated",
-            novelty_class=novelty_info.get("class", ""),
-            novelty_score=novelty_info.get("score", 0.0),
-        )
-        return result
-
-    async def _legacy_standard_capture(
-        self,
-        text: str,
-        raw_event,
-        detector,
-        tier2_filter,
-        record_builder,
-        envector_client,
-        embedding_service,
-    ) -> Dict[str, Any]:
-        """Standard 3-tier capture pipeline (legacy).
-
-        Requires API keys for Tier 2 (LLM filter) and Tier 3 (LLM extraction).
-        Retained for backward compatibility with deployments that have
-        ANTHROPIC_API_KEY configured and prefer server-side evaluation.
-
-        Most deployments should use agent-delegated mode instead — pass
-        the ``extracted`` parameter to let the calling agent handle
-        evaluation and extraction.
-        """
-        # Tier 1: Embedding similarity detection (0 LLM tokens)
-        detection = detector.detect(text)
-        if not detection.is_significant:
-            return {
-                "ok": True,
-                "captured": False,
-                "reason": f"Not significant (confidence: {detection.confidence:.2f}, threshold: {detector.threshold})",
-            }
-
-        # Tier 2: LLM policy filter (~200 tokens)
-        if tier2_filter and tier2_filter.is_available:
-            filter_result = tier2_filter.evaluate(
-                text,
-                tier1_score=detection.confidence,
-                tier1_pattern=detection.matched_pattern or "",
-            )
-            if not filter_result.should_capture:
-                return {
-                    "ok": True,
-                    "captured": False,
-                    "reason": f"Tier 2 rejected: {filter_result.reason}",
-                }
-            # Update domain from Tier 2 if available
-            if filter_result.domain and filter_result.domain != "general":
-                from dataclasses import replace
-                detection = replace(detection, domain=filter_result.domain)
-
-        # Tier 3: Structured extraction + record building (~500 tokens)
-        records = record_builder.build_phases(raw_event, detection)
-
-        # Store in enVector with FHE encryption
-        texts = [_embedding_text_for_record(r) for r in records]
-        metadata = [r.model_dump(mode="json") for r in records]
-        insert_result = envector_client.insert_with_text(
-            index_name=self._vault_index_name,
-            texts=texts,
-            embedding_service=embedding_service,
-            metadata=metadata,
-        )
-
-        if not insert_result.get("ok"):
-            return {"ok": False, "error": f"Insert failed: {insert_result.get('error')}"}
-
-        first = records[0]
-        result = {
-            "ok": True,
-            "captured": True,
-            "record_id": first.id,
-            "summary": first.title,
-            "domain": first.domain.value,
-            "certainty": first.why.certainty.value,
-        }
-        if len(records) > 1:
-            result["record_count"] = len(records)
-            result["group_id"] = first.group_id
-            result["group_type"] = first.group_type or "phase_chain"
-        _append_capture_log(first.id, first.title, first.domain.value, "standard")
-        return result
-
-    def _init_pipelines_background(self) -> None:
-        """Run _init_pipelines in background, then signal readiness."""
-        try:
-            result = self._init_pipelines()
-            if result["errors"]:
-                self._pipelines_error = "; ".join(
-                    e if isinstance(e, str) else e.get("message", str(e))
-                    for e in result["errors"]
-                )
-        except Exception as e:
-            self._pipelines_error = str(e)
-            logger.error("Background pipeline init failed: %s", e, exc_info=True)
-        finally:
-            self._pipelines_ready.set()
-
-    def _ensure_pipelines(self, timeout: float = 120.0) -> Optional[Dict[str, Any]]:
-        """Wait for background pipeline init. Returns error dict if not ready, None if ok."""
-        if not self._pipelines_ready.is_set():
-            logger.info("Waiting for pipeline initialization to complete...")
-            ready = self._pipelines_ready.wait(timeout=timeout)
-            if not ready:
-                return make_error(PipelineNotReadyError(
-                    "Pipeline initialization still in progress. Please retry shortly.",
-                    recovery_hint="The embedding model may still be downloading. Try again in a few seconds.",
-                ))
-        if self._pipelines_error:
-            return make_error(PipelineNotReadyError(
-                f"Pipeline initialization failed: {self._pipelines_error}",
-                recovery_hint="Run /rune:activate or restart Claude Code.",
-            ))
-        return None
-
-    def _init_pipelines(self) -> Dict[str, Any]:
-        """
-        (Re-)initialize scribe and retriever pipelines by reading fresh config.
-        Called at startup from main() and at runtime from reload_pipelines tool.
-        """
-        result = {"scribe": False, "retriever": False, "state": "unknown", "errors": []}
-
-        try:
-            from agents.common.config import load_config as load_rune_config
-            from agents.common.embedding_service import EmbeddingService
-            from agents.common.envector_client import EnVectorClient
-            from agents.common.pattern_cache import PatternCache
-            from agents.scribe.pattern_parser import load_all_language_patterns
-            from agents.scribe.detector import DecisionDetector
-            from agents.scribe.tier2_filter import Tier2Filter
-            from agents.scribe.llm_extractor import LLMExtractor
-            from agents.scribe.record_builder import RecordBuilder
-            from agents.retriever.query_processor import QueryProcessor
-            from agents.retriever.searcher import Searcher
-            from agents.retriever.synthesizer import Synthesizer
-
-            rune_config = load_rune_config()
-            result["state"] = rune_config.state
-
-            if rune_config.state != "active":
-                self._scribe = None
-                self._retriever = None
-                return result
-
-            embedding_svc = EmbeddingService(
-                mode=rune_config.embedding.mode,
-                model=rune_config.embedding.model,
-            )
-
-            # Resolve key_id: prefer Vault-provided, then instance, then fetch from Vault
-            key_path = self._key_path
-            key_id = self._key_id
-
-            # Always re-fetch from Vault on reload to pick up endpoint/index changes
-            if rune_config.vault.endpoint and rune_config.vault.token:
-                logger.info("Fetching keys from Vault...")
-                success, vault_index, vault_key_id, vault_agent_id, vault_agent_dek, vault_ev_endpoint, vault_ev_api_key = fetch_keys_from_vault(
-                    rune_config.vault.endpoint,
-                    rune_config.vault.token,
-                    key_path,
-                    ca_cert=rune_config.vault.ca_cert or None,
-                    tls_disable=rune_config.vault.tls_disable,
-                )
-                if success and vault_key_id:
-                    key_id = vault_key_id
-                    self._key_id = key_id
-                    logger.info(f"Vault provided key_id: {key_id}")
-                    if vault_index:
-                        self._vault_index_name = vault_index
-                    if vault_agent_id:
-                        self._agent_id = vault_agent_id
-                    if vault_agent_dek:
-                        self._agent_dek = vault_agent_dek
-                    if vault_ev_endpoint:
-                        self._envector_endpoint = vault_ev_endpoint
-                    if vault_ev_api_key:
-                        self._envector_api_key = vault_ev_api_key
-
-                    # Cache enVector credentials to config.json
-                    if vault_ev_endpoint or vault_ev_api_key:
-                        from agents.common.config import save_config as save_rune_config
-                        if vault_ev_endpoint:
-                            rune_config.envector.endpoint = vault_ev_endpoint
-                        if vault_ev_api_key:
-                            rune_config.envector.api_key = vault_ev_api_key
-                        save_rune_config(rune_config)
-                        logger.info("Cached enVector credentials to config.json")
-                else:
-                    result["errors"].append("Failed to fetch keys from Vault")
-                    logger.error("Failed to fetch keys from Vault — capture/search will fail")
-                    _set_dormant_with_reason("vault_unreachable")
-
-            # Use cached enVector credentials from config if not set by Vault
-            if not self._envector_endpoint and rune_config.envector.endpoint:
-                self._envector_endpoint = rune_config.envector.endpoint
-            if not self._envector_api_key and rune_config.envector.api_key:
-                self._envector_api_key = rune_config.envector.api_key
-
-            if not key_id:
-                result["errors"].append("key_id not available. Vault must provide key_id.")
-                logger.error("key_id unknown — aborting pipeline init")
-                return result
-
-            key_dir = os.path.join(key_path, key_id)
-            enc_key_path = os.path.join(key_dir, "EncKey.json")
-
-            # Early return if EncKey still missing after fetch attempt
-            if not os.path.exists(enc_key_path):
-                result["errors"].append(
-                    f"EncKey.json not found at {enc_key_path}. "
-                    "Cannot initialize pipelines without encryption keys."
-                )
-                logger.error(f"EncKey.json missing at {enc_key_path} — aborting pipeline init")
-                return result
-
-            envector_client = EnVectorClient(
-                address=self._envector_endpoint or "",
-                key_path=key_path,
-                key_id=key_id,
-                access_token=self._envector_api_key or "",
-                auto_key_setup=False,
-                agent_id=self._agent_id,
-                agent_dek=self._agent_dek,
-            )
-
-            # Refresh 'self.envector' to reflect updated endpoint/API key from Vault
-            try:
-                self.envector = EnVectorSDKAdapter(
-                    address=self._envector_endpoint or "",
-                    key_id=key_id,
-                    key_path=key_path,
-                    eval_mode="rmp",
-                    query_encryption=False,
-                    access_token=self._envector_api_key or "",
-                    auto_key_setup=False,
-                    agent_id=self._agent_id,
-                    agent_dek=self._agent_dek,
-                )
-            except Exception as e:
-                logger.warning("enVector adapter refresh failed: %s", e)
-
-            llm_cfg = rune_config.llm
-            configured_llm_provider = (llm_cfg.provider or os.getenv("RUNE_LLM_PROVIDER", "anthropic")).lower()
-            configured_tier2_provider = (llm_cfg.tier2_provider or os.getenv("RUNE_TIER2_LLM_PROVIDER", configured_llm_provider)).lower()
-            anthropic_key = llm_cfg.anthropic_api_key or os.getenv("ANTHROPIC_API_KEY", "")
-            openai_key = llm_cfg.openai_api_key or os.getenv("OPENAI_API_KEY", "")
-            google_key = llm_cfg.google_api_key or os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY") or ""
-
-            def _resolve_provider(configured: str, fallback: str) -> str:
-                if configured == "auto":
-                    if self._client_provider_override in ("anthropic", "openai", "google"):
-                        return self._client_provider_override
-                    env_auto = os.getenv("RUNE_AUTO_LLM_PROVIDER", "").lower()
-                    if env_auto in ("anthropic", "openai", "google"):
-                        return env_auto
-                    return fallback
-                if configured in ("anthropic", "openai", "google"):
-                    return configured
-                return fallback
-
-            llm_provider = _resolve_provider(configured_llm_provider, "anthropic")
-            tier2_provider = _resolve_provider(configured_tier2_provider, llm_provider)
-            self._active_llm_provider = llm_provider
-            self._active_tier2_provider = tier2_provider
-
-            def _provider_key(provider: str) -> str:
-                if provider == "openai":
-                    return openai_key
-                if provider == "google":
-                    return google_key
-                return anthropic_key
-
-            def _provider_model(provider: str, role: str) -> str:
-                if provider == "openai":
-                    if role == "tier2" and llm_cfg.openai_tier2_model:
-                        return llm_cfg.openai_tier2_model
-                    return llm_cfg.openai_model
-                if provider == "google":
-                    if role == "tier2" and llm_cfg.google_tier2_model:
-                        return llm_cfg.google_tier2_model
-                    return llm_cfg.google_model
-                if role == "tier2":
-                    return rune_config.scribe.tier2_model
-                return llm_cfg.anthropic_model
-
-            # Phase 1: Core infrastructure (always needed)
-            has_llm_key = bool(_provider_key(llm_provider))
-
-            llm_extractor = None
-            if has_llm_key:
-                llm_extractor = LLMExtractor(
-                    llm_provider=llm_provider,
-                    anthropic_api_key=anthropic_key,
-                    openai_api_key=openai_key,
-                    google_api_key=google_key,
-                    model=_provider_model(llm_provider, "extract"),
-                )
-            record_builder = RecordBuilder(llm_extractor=llm_extractor)
-
-            # Phase 2: Legacy pipeline components (only if API keys present)
-            detector = None
-            tier2_filter = None
-            if has_llm_key:
-                pattern_cache = PatternCache(embedding_svc)
-                patterns = load_all_language_patterns()
-                loaded = pattern_cache.load_patterns(patterns)
-                logger.info(f"Scribe Tier 1: loaded {loaded} patterns into cache")
-
-                detector = DecisionDetector(
-                    pattern_cache,
-                    threshold=rune_config.scribe.similarity_threshold,
-                    high_confidence_threshold=rune_config.scribe.auto_capture_threshold,
-                )
-
-                if rune_config.scribe.tier2_enabled and _provider_key(tier2_provider):
-                    tier2_filter = Tier2Filter(
-                        llm_provider=tier2_provider,
-                        anthropic_api_key=anthropic_key,
-                        openai_api_key=openai_key,
-                        google_api_key=google_key,
-                        model=_provider_model(tier2_provider, "tier2"),
-                    )
-
-            self._scribe = {
-                "record_builder": record_builder,
-                "envector_client": envector_client,
-                "embedding_service": embedding_svc,
-                # Legacy pipeline components (None if no API keys)
-                "detector": detector,
-                "tier2_filter": tier2_filter,
-            }
-            # Unify embedding: pipeline's EmbeddingService is the single source
-            self.embedding = embedding_svc
-            result["scribe"] = True
-            if has_llm_key:
-                logger.info("Scribe pipeline initialized (server-side Tier 2/3)")
-            else:
-                logger.info("Scribe pipeline initialized (agent-delegated mode — no LLM API key)")
-
-            # Retriever pipeline
-            if not self._vault_index_name:
-                result["errors"].append("Vault index name not available — retriever pipeline skipped.")
-                logger.warning("No vault index name — skipping retriever pipeline init")
-            else:
-                query_processor = QueryProcessor(
-                    llm_provider=llm_provider,
-                    anthropic_api_key=anthropic_key,
-                    openai_api_key=openai_key,
-                    google_api_key=google_key,
-                    model=_provider_model(llm_provider, "query"),
-                )
-                searcher = Searcher(envector_client, embedding_svc, self._vault_index_name, vault_client=self.vault)
-
-                synthesizer = None
-                if has_llm_key:
-                    synthesizer = Synthesizer(
-                        llm_provider=llm_provider,
-                        anthropic_api_key=anthropic_key,
-                        openai_api_key=openai_key,
-                        google_api_key=google_key,
-                        model=_provider_model(llm_provider, "query"),
-                    )
-
-                self._retriever = {
-                    "query_processor": query_processor,
-                    "searcher": searcher,
-                    "synthesizer": synthesizer,
-                }
-                result["retriever"] = True
-                if has_llm_key:
-                    logger.info("Retriever pipeline initialized (server-side synthesis)")
-                else:
-                    logger.info("Retriever pipeline initialized (agent-delegated mode — raw results returned)")
-
-        except VaultError as e:
-            result["errors"].append({
-                "code": "VAULT_CONNECTION_ERROR",
-                "message": str(e),
-                "retryable": True,
-                "recovery_hint": "Vault connection failed during pipeline initialization. Check Vault endpoint and token via /rune:status.",
-            })
-            _set_dormant_with_reason("vault_unreachable")
-            logger.warning(f"Pipeline init failed (Vault): {e}")
-        except Exception as e:
-            result["errors"].append({
-                "code": "INTERNAL_ERROR",
-                "message": str(e),
-                "retryable": False,
-                "recovery_hint": "Unexpected error during pipeline initialization. Try /rune:activate or restart Claude Code.",
-            })
-            _set_dormant_with_reason("pipeline_init_failed")
-            logger.warning(f"Pipeline init failed: {e}")
-
-        return result
-
-    def run(self) -> None:
-        """Runs the MCP server using stdio transport."""
-        self.mcp.run(transport="stdio")
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Run the enVector MCP server (stdio).")
-    parser.add_argument(
-        "--mode", default="stdio", help=argparse.SUPPRESS,  # kept for backwards compat
-    )
-    parser.add_argument(
-        "--server-name",
-        default=os.getenv("MCP_SERVER_NAME", "envector_mcp_server"),
-        help="Advertised MCP server name.",
-    )
-    parser.add_argument(
-        "--envector-endpoint", "--envector-address",
-        dest="envector_endpoint",
-        default=os.getenv("ENVECTOR_ENDPOINT") or os.getenv("ENVECTOR_ADDRESS"),
-        help="enVector endpoint (host:port or URL).",
-    )
-    parser.add_argument(
-        "--envector-key-id",
-        default=os.getenv("ENVECTOR_KEY_ID", "vault-key"),
-        help="enVector key identifier.",
-    )
-    parser.add_argument(
-        "--envector-key-path",
-        default=os.getenv("ENVECTOR_KEY_PATH", os.path.join(CURRENT_DIR, "keys")),
-        help="Path to the enVector key directory.",
-    )
-    parser.add_argument(
-        "--envector-eval-mode",
-        default=os.getenv("ENVECTOR_EVAL_MODE", "rmp"),
-        help="enVector evaluation mode (e.g., 'rmp', 'mm').",
-    )
-    parser.add_argument(
-        "--encrypted-query",
-        action="store_true",
-        default=os.getenv("ENVECTOR_ENCRYPTED_QUERY", "false").lower() in ("true", "1", "yes"),
-        help="Encrypt the query vectors."
-    )
-    parser.add_argument(
-        "--no-auto-key-setup",
-        action="store_true",
-        help="Disable automatic key generation. Use when keys are provided externally (e.g., from Rune-Vault).",
-    )
-    args = parser.parse_args()
-
-    MCP_SERVER_NAME = args.server_name
-    ENVECTOR_ENDPOINT = args.envector_endpoint or ""
-    ENVECTOR_API_KEY = os.getenv("ENVECTOR_API_KEY", None)
-    ENVECTOR_KEY_ID = args.envector_key_id
-    ENVECTOR_KEY_PATH = args.envector_key_path
-    ENVECTOR_EVAL_MODE = args.envector_eval_mode
-    ENCRYPTED_QUERY = args.encrypted_query
-
-    # ── Load ~/.rune/config.json if ENVECTOR_CONFIG is set ──
-    _vault_cfg = {}  # populated from config file if available
-    _config_path = os.getenv("ENVECTOR_CONFIG")
-    if _config_path:
-        _config_path = os.path.expanduser(_config_path)
-        if os.path.exists(_config_path):
-            try:
-                with open(_config_path) as _cf:
-                    _rune_config = json.load(_cf)
-                _vault_cfg = _rune_config.get("vault", {})
-                if not os.getenv("RUNEVAULT_ENDPOINT") and (_vault_cfg.get("endpoint") or _vault_cfg.get("url")):
-                    os.environ["RUNEVAULT_ENDPOINT"] = _vault_cfg.get("endpoint") or _vault_cfg["url"]
-                if not os.getenv("RUNEVAULT_TOKEN") and _vault_cfg.get("token"):
-                    os.environ["RUNEVAULT_TOKEN"] = _vault_cfg["token"]
-                logger.info(f"Loaded Rune config from {_config_path}")
-            except Exception as _e:
-                logger.warning(f"Failed to read Rune config {_config_path}: {_e}")
-        else:
-            logger.info(f"Rune config not found at {_config_path}, using env vars only")
-
-    # Rune-Vault Integration
-    _env_var = os.getenv("ENVECTOR_AUTO_KEY_SETUP", "true").lower() in ("true", "1", "yes")
-    AUTO_KEY_SETUP = _env_var and not args.no_auto_key_setup
-    RUNEVAULT_ENDPOINT = os.getenv("RUNEVAULT_ENDPOINT", None)
-    RUNEVAULT_TOKEN = os.getenv("RUNEVAULT_TOKEN", None)
-
-    VAULT_CA_CERT = os.getenv("VAULT_CA_CERT") or _vault_cfg.get("ca_cert", "") or None
-    VAULT_TLS_DISABLE = os.getenv("VAULT_TLS_DISABLE", "").lower() == "true"
-    if not VAULT_TLS_DISABLE:
-        VAULT_TLS_DISABLE = bool(_vault_cfg.get("tls_disable", False))
-
-    VAULT_CONFIGURED = bool(RUNEVAULT_ENDPOINT and RUNEVAULT_TOKEN)
-    VAULT_KEYS_LOADED = False
-    VAULT_INDEX_NAME = None
-    AGENT_ID = None
-    AGENT_DEK = None
-
-    if RUNEVAULT_ENDPOINT and RUNEVAULT_TOKEN:
-        # When Vault is configured (Rune plugin mode), use canonical key path.
-        # key_id is discovered from Vault — no hardcoded default.
-        ENVECTOR_KEY_PATH = MCPServerApp.DEFAULT_KEY_PATH
-
-        logger.info(f"Vault configured — fetching public keys from: {RUNEVAULT_ENDPOINT}")
-        success, vault_index, vault_key_id, vault_agent_id, vault_agent_dek, vault_ev_endpoint, vault_ev_api_key = fetch_keys_from_vault(
-            RUNEVAULT_ENDPOINT, RUNEVAULT_TOKEN,
-            ENVECTOR_KEY_PATH,
-            ca_cert=VAULT_CA_CERT,
-            tls_disable=VAULT_TLS_DISABLE,
-        )
-        if success and vault_key_id:
-            ENVECTOR_KEY_ID = vault_key_id
-            logger.info(f"Vault provided key_id: {ENVECTOR_KEY_ID}")
-            AUTO_KEY_SETUP = False
-            VAULT_KEYS_LOADED = True
-            VAULT_INDEX_NAME = vault_index
-            AGENT_ID = vault_agent_id
-            AGENT_DEK = vault_agent_dek
-            if vault_ev_endpoint:
-                ENVECTOR_ENDPOINT = vault_ev_endpoint
-                logger.info("Using enVector endpoint from Vault bundle")
-            if vault_ev_api_key:
-                ENVECTOR_API_KEY = vault_ev_api_key
-                logger.info("Using enVector API key from Vault bundle")
-            if not vault_ev_endpoint or not vault_ev_api_key:
-                logger.error("Vault bundle missing enVector credentials. Contact your Vault administrator.")
-                _set_dormant_with_reason("envector_not_provisioned")
-        else:
-            logger.error("Failed to fetch keys/key_id from Vault. Operations requiring encryption will fail.")
-            _set_dormant_with_reason("vault_unreachable")
-            AUTO_KEY_SETUP = False
-    elif RUNEVAULT_ENDPOINT and not RUNEVAULT_TOKEN:
-        logger.warning("Vault endpoint provided but no token specified. Skipping Vault integration.")
-        VAULT_CONFIGURED = True
-        AUTO_KEY_SETUP = False
-    elif not AUTO_KEY_SETUP:
-        logger.info(f"Using externally provided keys from: {ENVECTOR_KEY_PATH}")
-
-    envector_adapter = None
-    try:
-        envector_adapter = EnVectorSDKAdapter(
-            address=ENVECTOR_ENDPOINT,
-            key_id=ENVECTOR_KEY_ID,
-            key_path=ENVECTOR_KEY_PATH,
-            eval_mode=ENVECTOR_EVAL_MODE,
-            query_encryption=ENCRYPTED_QUERY,
-            access_token=ENVECTOR_API_KEY,
-            auto_key_setup=AUTO_KEY_SETUP,
-        )
-    except Exception as e:
-        logger.warning(f"enVector adapter init failed (server will start in degraded mode): {e}")
-
-    vault_client = None
-    if RUNEVAULT_ENDPOINT and RUNEVAULT_TOKEN:
-        logger.info(f"Initializing Vault client: {RUNEVAULT_ENDPOINT}")
-        vault_client = VaultClient(
-            vault_endpoint=RUNEVAULT_ENDPOINT,
-            vault_token=RUNEVAULT_TOKEN,
-            ca_cert=VAULT_CA_CERT,
-            tls_disable=VAULT_TLS_DISABLE,
-        )
-        logger.info("Vault client initialized - recall tool available")
-    else:
-        logger.info("Vault not configured - recall tool will be unavailable")
-
-    # ── Create MCP app (pipelines initialized via _init_pipelines) ──
-    app = MCPServerApp(
-        mcp_server_name=MCP_SERVER_NAME,
-        envector_adapter=envector_adapter,
-        vault_client=vault_client,
-        vault_index_name=VAULT_INDEX_NAME,
-        key_path=ENVECTOR_KEY_PATH,
-        key_id=ENVECTOR_KEY_ID,
-        agent_id=AGENT_ID,
-        agent_dek=AGENT_DEK,
-    )
-
-    # Set enVector credentials from Vault bundle on app instance
-    if ENVECTOR_ENDPOINT:
-        app._envector_endpoint = ENVECTOR_ENDPOINT
-    if ENVECTOR_API_KEY:
-        app._envector_api_key = ENVECTOR_API_KEY
-
-    # Initialize pipelines in background — tools are registered immediately,
-    # pipeline-dependent tools wait via _ensure_pipelines().
-    threading.Thread(
-        target=app._init_pipelines_background,
-        name="rune-pipeline-init",
-        daemon=True,
-    ).start()
-    logger.info("Pipeline initialization started in background")
-
-    def _handle_shutdown(signum, frame):
-        # Close stdin fd to unblock the anyio worker thread that is stuck on
-        # readline().  Without this, Py_FinalizeEx tries to GC the same
-        # TextIOWrapper whose buffer lock the worker thread still holds,
-        # triggering "could not acquire lock for <BufferedReader>" → abort().
-        try:
-            os.close(0)
-        except OSError:
-            pass
-        os._exit(0)
-    for sig in (signal.SIGINT, getattr(signal, "SIGTERM", None)):
-        if sig is not None:
-            signal.signal(sig, _handle_shutdown)
-
-    app.run()
diff --git a/mcp/tests/__init__.py b/mcp/tests/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/mcp/tests/test_errors.py b/mcp/tests/test_errors.py
deleted file mode 100644
index 3a78215..0000000
--- a/mcp/tests/test_errors.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# tests/test_errors.py
-import pytest
-from server.errors import (
-    RuneError,
-    VaultConnectionError,
-    VaultDecryptionError,
-    EnvectorConnectionError,
-    EnvectorInsertError,
-    PipelineNotReadyError,
-    InvalidInputError,
-    make_error,
-)
-
-class TestRuneErrorHierarchy:
-    def test_base_defaults(self):
-        e = RuneError("boom")
-        assert str(e) == "boom"
-        assert e.code == "INTERNAL_ERROR"
-        assert e.retryable is False
-        assert e.recovery_hint == ""
-
-    def test_vault_connection_error(self):
-        e = VaultConnectionError("unreachable")
-        assert isinstance(e, RuneError)
-        assert e.code == "VAULT_CONNECTION_ERROR"
-        assert e.retryable is True
-        assert "Vault is unreachable" in e.recovery_hint
-        assert "/rune:status" in e.recovery_hint
-
-    def test_vault_decryption_error(self):
-        e = VaultDecryptionError("bad token")
-        assert isinstance(e, RuneError)
-        assert e.code == "VAULT_DECRYPTION_ERROR"
-        assert e.retryable is False
-        assert "token" in e.recovery_hint.lower()
-        assert "/rune:configure" in e.recovery_hint
-
-    def test_envector_connection_error(self):
-        e = EnvectorConnectionError("timeout")
-        assert isinstance(e, RuneError)
-        assert e.code == "ENVECTOR_CONNECTION_ERROR"
-        assert e.retryable is True
-        assert "enVector" in e.recovery_hint
-
-    def test_envector_insert_error(self):
-        e = EnvectorInsertError("index not found")
-        assert isinstance(e, RuneError)
-        assert e.code == "ENVECTOR_INSERT_ERROR"
-        assert e.retryable is True
-        assert "retry" in e.recovery_hint.lower()
-
-    def test_pipeline_not_ready_error(self):
-        e = PipelineNotReadyError("scribe not initialized")
-        assert isinstance(e, RuneError)
-        assert e.code == "PIPELINE_NOT_READY"
-        assert e.retryable is False
-        assert "/rune:activate" in e.recovery_hint
-
-    def test_invalid_input_error(self):
-        e = InvalidInputError("topk too large")
-        assert isinstance(e, RuneError)
-        assert e.code == "INVALID_INPUT"
-        assert e.retryable is False
-
-    def test_override_code_and_retryable(self):
-        e = RuneError("custom", code="CUSTOM_CODE", retryable=True)
-        assert e.code == "CUSTOM_CODE"
-        assert e.retryable is True
-
-    def test_override_recovery_hint(self):
-        e = VaultConnectionError("unreachable", recovery_hint="Custom hint for this case.")
-        assert e.recovery_hint == "Custom hint for this case."
-
-    def test_base_error_no_recovery_hint(self):
-        e = RuneError("generic failure")
-        assert e.recovery_hint == ""
-
-
-class TestMakeError:
-    def test_rune_error_produces_structured_dict(self):
-        result = make_error(VaultConnectionError("cannot reach vault"))
-        assert result["ok"] is False
-        assert result["error"]["code"] == "VAULT_CONNECTION_ERROR"
-        assert result["error"]["message"] == "cannot reach vault"
-        assert result["error"]["retryable"] is True
-        assert "recovery_hint" in result["error"]
-        assert "/rune:status" in result["error"]["recovery_hint"]
-
-    def test_generic_exception_falls_back(self):
-        result = make_error(RuntimeError("unexpected"))
-        assert result["ok"] is False
-        assert result["error"]["code"] == "INTERNAL_ERROR"
-        assert result["error"]["message"] == "unexpected"
-        assert result["error"]["retryable"] is False
-        assert "recovery_hint" not in result["error"]
-
-    def test_invalid_input_error(self):
-        result = make_error(InvalidInputError("topk must be 10 or less"))
-        assert result["ok"] is False
-        assert result["error"]["code"] == "INVALID_INPUT"
-        assert result["error"]["retryable"] is False
-
-    def test_pipeline_not_ready_error(self):
-        result = make_error(PipelineNotReadyError("Retriever not initialized"))
-        assert result["ok"] is False
-        assert result["error"]["code"] == "PIPELINE_NOT_READY"
-        assert result["error"]["message"] == "Retriever not initialized"
-        assert result["error"]["retryable"] is False
-        assert "/rune:activate" in result["error"]["recovery_hint"]
-
-    def test_custom_recovery_hint_in_make_error(self):
-        e = EnvectorConnectionError("timeout", recovery_hint="Custom: check your firewall settings.")
-        result = make_error(e)
-        assert result["error"]["recovery_hint"] == "Custom: check your firewall settings."
-
-    def test_empty_recovery_hint_omitted(self):
-        e = RuneError("generic")
-        result = make_error(e)
-        assert "recovery_hint" not in result["error"]
diff --git a/mcp/tests/test_server.py b/mcp/tests/test_server.py
deleted file mode 100644
index 297ed70..0000000
--- a/mcp/tests/test_server.py
+++ /dev/null
@@ -1,538 +0,0 @@
-# tests/test_server.py
-import os
-import sys
-import pytest
-from unittest.mock import patch
-
-from typing import Union, List, Any, Dict, Optional
-
-import numpy as np
-
-# Add mcp directory (rune/mcp/) to import path
-MCP_ROOT = os.path.dirname(os.path.dirname(__file__))
-if MCP_ROOT not in sys.path:
-    sys.path.insert(0, MCP_ROOT)
-
-from fastmcp import Client
-from server.server import MCPServerApp
-from adapter import EnVectorSDKAdapter
-from adapter.vault_client import VaultClient, DecryptResult
-
-
-class FakeEmbeddingService:
-    """Fake embedding service matching EmbeddingService.embed() API."""
-    def embed(self, texts: List[str]) -> List[List[float]]:
-        return [[0.1, 0.2, 0.3] * (i+1) for i in range(len(texts))]
-
-    def embed_single(self, text: str) -> List[float]:
-        return self.embed([text])[0]
-
-@pytest.fixture
-def mcp_server():
-    """
-    Create and return a FastMCP server instance for testing.
-    Inject a fake adapter to avoid using the actual enVector SDK.
-    """
-    class FakeAdapter(EnVectorSDKAdapter):
-        def __init__(self):
-            pass  # Actual initialization not needed
-
-        # ----------- Mocked method: Get Index List ----------- #
-        def invoke_get_index_list(self) -> List[str]:
-            return ["index_a", "index_b"]
-
-        # ----------- Mocked method: Insert ----------- #
-        def invoke_insert(
-                self,
-                index_name: str,
-                vectors: List[List[float]],
-                metadata: Union[Any, List[Any]] = None
-            ) -> Dict[str, Any]:
-            return {"index_name": index_name, "vectors": vectors, "metadata": metadata}
-
-    app = MCPServerApp(envector_adapter=FakeAdapter(), mcp_server_name="test-mcp")
-    app.embedding = FakeEmbeddingService()
-    app._pipelines_ready.set()
-    return app.mcp  # FastMCP Instance
-
-
-# ----------- Low-Level Tool Removal Verification ----------- #
-@pytest.mark.asyncio
-async def test_low_level_tools_not_registered(mcp_server):
-    """Low-level enVector tools must never be exposed."""
-    async with Client(mcp_server) as client:
-        tools = await client.list_tools()
-        names = [t.name for t in tools]
-        forbidden = ["create_index", "get_index_list", "get_index_info",
-                      "insert", "insert_documents_from_path",
-                      "insert_documents_from_text", "search"]
-        for tool_name in forbidden:
-            assert tool_name not in names, f"{tool_name} should not be registered"
-
-
-# =========================================================================== #
-#  Fake Vault Client for testing vault_status tools
-# =========================================================================== #
-
-class FakeVaultClient(VaultClient):
-    """Fake Vault client that returns deterministic results without network calls."""
-    def __init__(self):
-        # Skip real __init__ to avoid network setup
-        self.vault_endpoint = "http://fake-vault:50080"
-        self.vault_token = "fake-token"
-        self.timeout = 5.0
-        self._grpc_target = "fake-vault:50051"
-        self._channel = None
-        self._stub = None
-        self._ca_cert = None
-        self._tls_disable = True
-
-    async def get_public_key(self) -> dict:
-        return {"EncKey.json": "{}", "EvalKey.json": "{}", "index_name": "team-decisions"}
-
-    async def health_check(self) -> bool:
-        return True
-
-    async def decrypt_search_results(
-        self,
-        encrypted_blob_b64: str,
-        top_k: int = 5,
-    ) -> DecryptResult:
-        return DecryptResult(
-            ok=True,
-            results=[
-                {"shard_idx": 0, "row_idx": 0, "score": 0.95},
-                {"shard_idx": 0, "row_idx": 1, "score": 0.80},
-            ][:top_k],
-        )
-
-    async def decrypt_metadata(
-        self,
-        encrypted_metadata_list: List[str],
-    ) -> List:
-        return [f"decrypted_{i}" for i in range(len(encrypted_metadata_list))]
-
-
-@pytest.fixture
-def mcp_server_with_vault():
-    """
-    MCP server fixture with a fake Vault client injected,
-    enabling the `vault_status` tool.
-    """
-    class FakeAdapterWithVault(EnVectorSDKAdapter):
-        def __init__(self):
-            pass
-
-        # --- existing mocked methods (same as FakeAdapter) ---
-        def invoke_get_index_list(self) -> List[str]:
-            return ["index_a", "index_b"]
-
-        def invoke_insert(self, index_name: str, vectors, metadata=None):
-            return {"index_name": index_name, "vectors": vectors, "metadata": metadata}
-
-    app = MCPServerApp(
-        envector_adapter=FakeAdapterWithVault(),
-        mcp_server_name="test-mcp-vault",
-        vault_client=FakeVaultClient(),
-        vault_index_name="team-decisions",
-    )
-    app.embedding = FakeEmbeddingService()
-    app._pipelines_ready.set()
-    return app.mcp
-
-
-# ----------- Vault Status Tool Tests ----------- #
-
-@pytest.mark.asyncio
-async def test_tools_list_contains_vault_status(mcp_server_with_vault):
-    async with Client(mcp_server_with_vault) as client:
-        tools = await client.list_tools()
-        names = [t.name for t in tools]
-        assert "vault_status" in names
-
-
-@pytest.mark.asyncio
-async def test_vault_status_with_vault_configured(mcp_server_with_vault):
-    async with Client(mcp_server_with_vault) as client:
-        result = await client.call_tool("vault_status", {})
-        data = getattr(result, "data", None) or getattr(result, "structured", None) \
-               or getattr(result, "structured_content", None)
-
-        assert data is not None, "No data returned from tool call"
-        assert data.get("ok") is True
-        assert data.get("vault_configured") is True
-        assert data.get("secure_search_available") is True
-        assert data.get("mode") == "secure (Vault-backed)"
-
-
-@pytest.mark.asyncio
-async def test_vault_status_without_vault(mcp_server):
-    """When no vault_client is injected the tool should report standard mode."""
-    async with Client(mcp_server) as client:
-        result = await client.call_tool("vault_status", {})
-        data = getattr(result, "data", None) or getattr(result, "structured", None) \
-               or getattr(result, "structured_content", None)
-
-        assert data is not None, "No data returned from tool call"
-        assert data.get("ok") is True
-        assert data.get("vault_configured") is False
-        assert data.get("secure_search_available") is False
-
-
-@pytest.mark.asyncio
-async def test_vault_status_includes_team_index_name(mcp_server_with_vault):
-    """vault_status should expose the team_index_name field."""
-    async with Client(mcp_server_with_vault) as client:
-        result = await client.call_tool("vault_status", {})
-        data = getattr(result, "data", None) or getattr(result, "structured", None) \
-               or getattr(result, "structured_content", None)
-
-        assert data is not None, "No data returned from tool call"
-        assert data.get("ok") is True
-        assert data.get("team_index_name") == "team-decisions"
-
-
-# ----------- Degraded Mode Tests (no enVector adapter) ----------- #
-
-@pytest.fixture
-def mcp_server_degraded():
-    """MCP server with envector_adapter=None, simulating startup when infra is down."""
-    app = MCPServerApp(mcp_server_name="test-mcp-degraded")
-    app._pipelines_ready.set()
-    return app.mcp
-
-
-@pytest.mark.asyncio
-async def test_degraded_server_starts_and_lists_tools(mcp_server_degraded):
-    """Server should start and register all tools even without enVector adapter."""
-    async with Client(mcp_server_degraded) as client:
-        tools = await client.list_tools()
-        names = [t.name for t in tools]
-        assert "reload_pipelines" in names
-        assert "capture" in names
-        assert "recall" in names
-        assert "vault_status" in names
-
-
-# ----------- Reload Pipelines Tool Tests ----------- #
-
-@pytest.mark.asyncio
-async def test_tools_list_contains_reload_pipelines(mcp_server):
-    async with Client(mcp_server) as client:
-        tools = await client.list_tools()
-        names = [t.name for t in tools]
-        assert "reload_pipelines" in names
-
-
-@pytest.mark.asyncio
-async def test_reload_pipelines_without_active_config(mcp_server):
-    """When state is not active, pipelines should remain None."""
-    async with Client(mcp_server) as client:
-        with patch.dict(os.environ, {"RUNE_STATE": "dormant"}):
-            result = await client.call_tool("reload_pipelines", {})
-        data = getattr(result, "data", None) or getattr(result, "structured", None) \
-               or getattr(result, "structured_content", None)
-
-        assert data is not None
-        assert data.get("scribe_initialized") is False
-        assert data.get("retriever_initialized") is False
-
-# Pre-warm tests
-
-@pytest.mark.asyncio
-async def test_reload_pipelines_returns_envector_warmup(mcp_server):
-    async with Client(mcp_server) as client:
-        result = await client.call_tool("reload_pipelines", {})
-        data = getattr(result, "data", None) or getattr(result, "structured", None) \
-               or getattr(result, "structured_content", None)
-
-        assert data is not None
-        # When pipelines aren't active (dormant), warmup may be None/empty
-        # but the field should still be present in the response
-        assert "envector_warmup" in data
-
-
-@pytest.mark.asyncio
-async def test_reload_pipelines_warmup_failure():
-    class FailingAdapter(EnVectorSDKAdapter):
-        def __init__(self):
-            pass
-
-        def invoke_get_index_list(self) -> List[str]:
-            raise ConnectionError("UNAVAILABLE: could not connect")
-
-    app = MCPServerApp(envector_adapter=FailingAdapter(), mcp_server_name="test-mcp-warmup-fail")
-    app.embedding = FakeEmbeddingService()
-    app._pipelines_ready.set()
-    # Force _scribe to be truthy so warmup path is triggered
-    app._scribe = {"record_builder": None, "envector_client": None, "embedding_service": None}
-
-    async with Client(app.mcp) as client:
-        result = await client.call_tool("reload_pipelines", {})
-        data = getattr(result, "data", None) or getattr(result, "structured", None) \
-               or getattr(result, "structured_content", None)
-
-        assert data is not None
-        warmup = data.get("envector_warmup")
-        # _init_pipelines resets _scribe to None (dormant), so warmup may be None;
-        # if the warmup path runs, it should report the failure
-        if warmup is not None:
-            assert warmup.get("ok") is False
-            assert "error" in warmup
-
-
-# ----------- Diagnostic Tool Tests ----------- #
-
-@pytest.mark.asyncio
-async def test_tools_list_contains_diagnostics(mcp_server_with_vault):
-    async with Client(mcp_server_with_vault) as client:
-        tools = await client.list_tools()
-        names = [t.name for t in tools]
-        assert "diagnostics" in names
-
-
-@pytest.mark.asyncio
-async def test_diagnostics_with_vault(mcp_server_with_vault):
-    async with Client(mcp_server_with_vault) as client:
-        result = await client.call_tool("diagnostics", {})
-        data = getattr(result, "data", None) or getattr(result, "structured", None) \
-               or getattr(result, "structured_content", None)
-
-        assert data is not None
-        vault = data.get("vault", {})
-        assert vault.get("configured") is True
-        assert vault.get("healthy") is True
-        assert vault.get("endpoint") == "http://fake-vault:50080"
-
-        keys = data.get("keys", {})
-        assert "enc_key_loaded" in keys
-        assert "key_id" in keys
-        assert "agent_dek_loaded" in keys
-
-        pipelines = data.get("pipelines", {})
-        assert "scribe" in pipelines
-        assert "retriever" in pipelines
-
-        envector = data.get("envector", {})
-        assert envector.get("reachable") is True
-        assert envector.get("latency_ms") is not None
-
-
-@pytest.mark.asyncio
-async def test_diagnostics_without_vault(mcp_server):
-    async with Client(mcp_server) as client:
-        result = await client.call_tool("diagnostics", {})
-        data = getattr(result, "data", None) or getattr(result, "structured", None) \
-               or getattr(result, "structured_content", None)
-
-        assert data is not None
-        vault = data.get("vault", {})
-        assert vault.get("configured") is False
-        assert vault.get("healthy") is False
-
-        keys = data.get("keys", {})
-        assert keys.get("enc_key_loaded") is False
-        assert keys.get("agent_dek_loaded") is False
-
-        pipelines = data.get("pipelines", {})
-        assert pipelines.get("scribe") is False
-        assert pipelines.get("retriever") is False
-
-        envector = data.get("envector", {})
-        assert envector.get("reachable") is True
-        assert envector.get("latency_ms") is not None
-
-
-@pytest.mark.asyncio
-async def test_diagnostics_no_envector(mcp_server_degraded):
-    async with Client(mcp_server_degraded) as client:
-        result = await client.call_tool("diagnostics", {})
-        data = getattr(result, "data", None) or getattr(result, "structured", None) \
-               or getattr(result, "structured_content", None)
-
-        assert data is not None
-        vault = data.get("vault", {})
-        assert vault.get("configured") is False
-        assert vault.get("healthy") is False
-
-        keys = data.get("keys", {})
-        assert keys.get("enc_key_loaded") is False
-        assert keys.get("agent_dek_loaded") is False
-
-        pipelines = data.get("pipelines", {})
-        assert pipelines.get("scribe") is False
-        assert pipelines.get("retriever") is False
-
-        envector = data.get("envector", {})
-        assert envector.get("reachable") is False
-
-
-# enVector connection related tests
-
-@pytest.fixture
-def mcp_server_envector_timeout():
-    import time
-
-    class SlowAdapter(EnVectorSDKAdapter):
-        def __init__(self):
-            pass
-
-        def invoke_get_index_list(self) -> List[str]:
-            time.sleep(10)  # Longer than ENVECTOR_DIAGNOSIS_TIMEOUT (5s)
-            return []
-
-    app = MCPServerApp(envector_adapter=SlowAdapter(), mcp_server_name="test-mcp-slow")
-    app.embedding = FakeEmbeddingService()
-    app._pipelines_ready.set()
-    return app.mcp
-
-
-@pytest.fixture
-def mcp_server_envector_connection_error():
-    class ErrorAdapter(EnVectorSDKAdapter):
-        def __init__(self):
-            pass
-
-        def invoke_get_index_list(self) -> List[str]:
-            raise ConnectionError("UNAVAILABLE: Connection refused to cloud.envector.io:443")
-
-    app = MCPServerApp(envector_adapter=ErrorAdapter(), mcp_server_name="test-mcp-err")
-    app.embedding = FakeEmbeddingService()
-    app._pipelines_ready.set()
-    return app.mcp
-
-
-@pytest.fixture
-def mcp_server_envector_auth_error():
-    class AuthErrorAdapter(EnVectorSDKAdapter):
-        def __init__(self):
-            pass
-
-        def invoke_get_index_list(self) -> List[str]:
-            raise Exception("UNAUTHENTICATED: invalid API key")
-
-    app = MCPServerApp(envector_adapter=AuthErrorAdapter(), mcp_server_name="test-mcp-auth")
-    app.embedding = FakeEmbeddingService()
-    app._pipelines_ready.set()
-    return app.mcp
-
-
-@pytest.mark.asyncio
-async def test_diagnostics_envector_timeout(mcp_server_envector_timeout):
-    async with Client(mcp_server_envector_timeout) as client:
-        result = await client.call_tool("diagnostics", {})
-        data = getattr(result, "data", None) or getattr(result, "structured", None) \
-               or getattr(result, "structured_content", None)
-
-        assert data is not None
-        envector = data.get("envector", {})
-        assert envector.get("reachable") is False
-        assert envector.get("error_type") == "timeout"
-        assert "elapsed_ms" in envector
-        assert "timed out" in envector.get("error", "").lower()
-
-
-@pytest.mark.asyncio
-async def test_diagnostics_envector_connection_refused(mcp_server_envector_connection_error):
-    async with Client(mcp_server_envector_connection_error) as client:
-        result = await client.call_tool("diagnostics", {})
-        data = getattr(result, "data", None) or getattr(result, "structured", None) \
-               or getattr(result, "structured_content", None)
-
-        assert data is not None
-        envector = data.get("envector", {})
-        assert envector.get("reachable") is False
-        assert envector.get("error_type") == "connection_refused"
-        assert "hint" in envector
-        assert "endpoint" in envector["hint"].lower()
-
-
-@pytest.mark.asyncio
-async def test_diagnostics_envector_auth_failure(mcp_server_envector_auth_error):
-    async with Client(mcp_server_envector_auth_error) as client:
-        result = await client.call_tool("diagnostics", {})
-        data = getattr(result, "data", None) or getattr(result, "structured", None) \
-               or getattr(result, "structured_content", None)
-
-        assert data is not None
-        envector = data.get("envector", {})
-        assert envector.get("reachable") is False
-        assert envector.get("error_type") == "auth_failure"
-        assert "hint" in envector
-
-
-# ----------- Error Response Tests ----------- #
-
-@pytest.mark.asyncio
-async def test_capture_returns_structured_error_when_pipeline_not_ready(mcp_server):
-    async with Client(mcp_server) as client:
-        result = await client.call_tool("capture", {"text": "test decision"})
-        data = getattr(result, "data", None) or getattr(result, "structured", None) \
-               or getattr(result, "structured_content", None)
-
-        assert data is not None
-        assert data.get("ok") is False
-        error = data.get("error")
-        assert isinstance(error, dict), f"Expected structured error dict, got: {type(error)}"
-        assert error.get("code") == "PIPELINE_NOT_READY"
-        assert error.get("retryable") is False
-        assert "Scribe" in error.get("message", "")
-
-
-@pytest.mark.asyncio
-async def test_recall_returns_structured_error_when_pipeline_not_ready(mcp_server):
-    async with Client(mcp_server) as client:
-        result = await client.call_tool("recall", {"query": "test query"})
-        data = getattr(result, "data", None) or getattr(result, "structured", None) \
-               or getattr(result, "structured_content", None)
-
-        assert data is not None
-        assert data.get("ok") is False
-        error = data.get("error")
-        assert isinstance(error, dict), f"Expected structured error dict, got: {type(error)}"
-        assert error.get("code") == "PIPELINE_NOT_READY"
-        assert error.get("retryable") is False
-        assert "Retriever" in error.get("message", "")
-
-
-@pytest.mark.asyncio
-async def test_recall_returns_structured_error_for_invalid_topk(mcp_server_with_vault):
-    async with Client(mcp_server_with_vault) as client:
-        result = await client.call_tool("recall", {"query": "test", "topk": 100})
-        data = getattr(result, "data", None) or getattr(result, "structured", None) \
-               or getattr(result, "structured_content", None)
-
-        assert data is not None
-        assert data.get("ok") is False
-        error = data.get("error")
-        assert isinstance(error, dict), f"Expected structured error dict, got: {type(error)}"
-        assert error.get("code") in ("PIPELINE_NOT_READY", "INVALID_INPUT")
-        assert isinstance(error.get("retryable"), bool)
-
-
-# ----------- Background Pipeline Init Tests ----------- #
-
-@pytest.mark.asyncio
-async def test_ensure_pipelines_returns_none_when_ready():
-    """_ensure_pipelines returns None when pipelines are already initialized."""
-    class FakeAdapter(EnVectorSDKAdapter):
-        def __init__(self):
-            pass
-    app = MCPServerApp(envector_adapter=FakeAdapter(), mcp_server_name="test-mcp")
-    app._pipelines_ready.set()
-    result = app._ensure_pipelines(timeout=0.1)
-    assert result is None
-
-
-@pytest.mark.asyncio
-async def test_ensure_pipelines_returns_error_on_timeout():
-    """_ensure_pipelines returns error dict when init times out."""
-    class FakeAdapter(EnVectorSDKAdapter):
-        def __init__(self):
-            pass
-    app = MCPServerApp(envector_adapter=FakeAdapter(), mcp_server_name="test-mcp")
-    # Don't set _pipelines_ready — simulate still initializing
-    result = app._ensure_pipelines(timeout=0.01)
-    assert result is not None
-    assert result["ok"] is False
-    assert "in progress" in result["error"]["message"].lower()
diff --git a/mcp/tests/test_vault_client.py b/mcp/tests/test_vault_client.py
deleted file mode 100644
index e6a284f..0000000
--- a/mcp/tests/test_vault_client.py
+++ /dev/null
@@ -1,250 +0,0 @@
-# tests/test_vault_client.py
-import json
-import pytest
-from dataclasses import dataclass, field
-from typing import List, Optional
-from unittest.mock import AsyncMock, MagicMock
-
-import os
-import sys
-
-MCP_ROOT = os.path.dirname(os.path.dirname(__file__))
-if MCP_ROOT not in sys.path:
-    sys.path.insert(0, MCP_ROOT)
-
-from adapter.vault_client import VaultClient, VaultError, DecryptResult
-
-
-# Fake protobuf response
-
-@dataclass
-class FakeGetPublicKeyResponse:
-    key_bundle_json: str = ""
-    error: str = ""
-
-
-@dataclass
-class FakeScoreEntry:
-    shard_idx: int = 0
-    row_idx: int = 0
-    score: float = 0.0
-
-
-@dataclass
-class FakeDecryptScoresResponse:
-    results: List[FakeScoreEntry] = field(default_factory=list)
-    error: str = ""
-
-
-@dataclass
-class FakeDecryptMetadataResponse:
-    decrypted_metadata: List[str] = field(default_factory=list)
-    error: str = ""
-
-
-# Helpers
-
-def _make_client_with_mock_stub() -> tuple[VaultClient, MagicMock]:
-    client = VaultClient(
-        vault_endpoint="tcp://fake-vault:50051",
-        vault_token="test-token",
-        tls_disable=True,
-    )
-    mock_stub = MagicMock()
-    client._channel = MagicMock()
-    client._stub = mock_stub
-    return client, mock_stub
-
-
-# Tests
-
-class TestGetPublicKey:
-
-    @pytest.mark.asyncio
-    async def test_valid_json(self):
-        client, stub = _make_client_with_mock_stub()
-        bundle = {"EncKey.json": "enc...", "EvalKey.json": "eval...", "index_name": "team-idx"}
-        stub.GetPublicKey = AsyncMock(return_value=FakeGetPublicKeyResponse(
-            key_bundle_json=json.dumps(bundle),
-            error="",
-        ))
-
-        result = await client.get_public_key()
-        assert result == bundle
-
-    @pytest.mark.asyncio
-    async def test_invalid_json_raises_vault_error(self):
-        client, stub = _make_client_with_mock_stub()
-        stub.GetPublicKey = AsyncMock(return_value=FakeGetPublicKeyResponse(
-            key_bundle_json="NOT VALID JSON {{{",
-            error="",
-        ))
-
-        with pytest.raises(VaultError, match="GetPublicKey returned invalid JSON"):
-            await client.get_public_key()
-
-    @pytest.mark.asyncio
-    async def test_invalid_json_preserves_cause(self):
-        client, stub = _make_client_with_mock_stub()
-        stub.GetPublicKey = AsyncMock(return_value=FakeGetPublicKeyResponse(
-            key_bundle_json="<<<broken>>>",
-            error="",
-        ))
-
-        with pytest.raises(VaultError) as exc_info:
-            await client.get_public_key()
-        assert isinstance(exc_info.value.__cause__, (json.JSONDecodeError, ValueError))
-
-    @pytest.mark.asyncio
-    async def test_empty_json_string_raises_vault_error(self):
-        client, stub = _make_client_with_mock_stub()
-        stub.GetPublicKey = AsyncMock(return_value=FakeGetPublicKeyResponse(
-            key_bundle_json="",
-            error="",
-        ))
-
-        with pytest.raises(VaultError, match="GetPublicKey returned invalid JSON"):
-            await client.get_public_key()
-
-    @pytest.mark.asyncio
-    async def test_server_error_field(self):
-        client, stub = _make_client_with_mock_stub()
-        stub.GetPublicKey = AsyncMock(return_value=FakeGetPublicKeyResponse(
-            key_bundle_json="",
-            error="token expired",
-        ))
-
-        with pytest.raises(VaultError, match="GetPublicKey failed: token expired"):
-            await client.get_public_key()
-
-    @pytest.mark.asyncio
-    async def test_grpc_error(self):
-        import grpc.aio
-        client, stub = _make_client_with_mock_stub()
-
-        rpc_error = grpc.aio.AioRpcError(
-            code=grpc.StatusCode.UNAVAILABLE,
-            initial_metadata=grpc.aio.Metadata(),
-            trailing_metadata=grpc.aio.Metadata(),
-            details="Connection refused",
-        )
-        stub.GetPublicKey = AsyncMock(side_effect=rpc_error)
-
-        with pytest.raises(VaultError, match="gRPC GetPublicKey failed"):
-            await client.get_public_key()
-
-
-class TestDecryptMetadata:
-
-    @pytest.mark.asyncio
-    async def test_valid_json_entries(self):
-        client, stub = _make_client_with_mock_stub()
-        entries = [
-            json.dumps({"decision": "use Postgres", "confidence": 0.9}),
-            json.dumps({"decision": "adopt gRPC", "confidence": 0.85}),
-        ]
-        stub.DecryptMetadata = AsyncMock(return_value=FakeDecryptMetadataResponse(
-            decrypted_metadata=entries,
-            error="",
-        ))
-
-        result = await client.decrypt_metadata(["enc1", "enc2"])
-        assert len(result) == 2
-        assert result[0]["decision"] == "use Postgres"
-        assert result[1]["decision"] == "adopt gRPC"
-
-    @pytest.mark.asyncio
-    async def test_one_bad_entry_raises_vault_error(self):
-        client, stub = _make_client_with_mock_stub()
-        entries = [
-            json.dumps({"decision": "valid"}),
-            "NOT JSON AT ALL",  # bad entry
-        ]
-        stub.DecryptMetadata = AsyncMock(return_value=FakeDecryptMetadataResponse(
-            decrypted_metadata=entries,
-            error="",
-        ))
-
-        with pytest.raises(VaultError, match="DecryptMetadata returned invalid JSON in metadata entry"):
-            await client.decrypt_metadata(["enc1", "enc2"])
-
-    @pytest.mark.asyncio
-    async def test_bad_entry_preserves_cause(self):
-        client, stub = _make_client_with_mock_stub()
-        stub.DecryptMetadata = AsyncMock(return_value=FakeDecryptMetadataResponse(
-            decrypted_metadata=["{{broken}}"],
-            error="",
-        ))
-
-        with pytest.raises(VaultError) as exc_info:
-            await client.decrypt_metadata(["enc1"])
-        assert isinstance(exc_info.value.__cause__, (json.JSONDecodeError, ValueError))
-
-    @pytest.mark.asyncio
-    async def test_empty_list(self):
-        client, stub = _make_client_with_mock_stub()
-        stub.DecryptMetadata = AsyncMock(return_value=FakeDecryptMetadataResponse(
-            decrypted_metadata=[],
-            error="",
-        ))
-
-        result = await client.decrypt_metadata([])
-        assert result == []
-
-    @pytest.mark.asyncio
-    async def test_server_error_field(self):
-        client, stub = _make_client_with_mock_stub()
-        stub.DecryptMetadata = AsyncMock(return_value=FakeDecryptMetadataResponse(
-            decrypted_metadata=[],
-            error="decryption key not found",
-        ))
-
-        with pytest.raises(VaultError, match="DecryptMetadata failed: decryption key not found"):
-            await client.decrypt_metadata(["enc1"])
-
-    @pytest.mark.asyncio
-    async def test_grpc_error(self):
-        import grpc.aio
-        client, stub = _make_client_with_mock_stub()
-
-        rpc_error = grpc.aio.AioRpcError(
-            code=grpc.StatusCode.DEADLINE_EXCEEDED,
-            initial_metadata=grpc.aio.Metadata(),
-            trailing_metadata=grpc.aio.Metadata(),
-            details="Deadline exceeded",
-        )
-        stub.DecryptMetadata = AsyncMock(side_effect=rpc_error)
-
-        with pytest.raises(VaultError, match="gRPC DecryptMetadata failed"):
-            await client.decrypt_metadata(["enc1"])
-
-
-class TestDecryptSearchResults:
-
-    @pytest.mark.asyncio
-    async def test_valid_response(self):
-        client, stub = _make_client_with_mock_stub()
-        stub.DecryptScores = AsyncMock(return_value=FakeDecryptScoresResponse(
-            results=[
-                FakeScoreEntry(shard_idx=0, row_idx=3, score=0.95),
-                FakeScoreEntry(shard_idx=0, row_idx=7, score=0.82),
-            ],
-            error="",
-        ))
-
-        result = await client.decrypt_search_results("base64blob", top_k=2)
-        assert result.ok is True
-        assert len(result.results) == 2
-        assert result.results[0]["score"] == 0.95
-
-    @pytest.mark.asyncio
-    async def test_server_error(self):
-        client, stub = _make_client_with_mock_stub()
-        stub.DecryptScores = AsyncMock(return_value=FakeDecryptScoresResponse(
-            results=[],
-            error="invalid ciphertext",
-        ))
-
-        result = await client.decrypt_search_results("bad-blob")
-        assert result.ok is False
-        assert result.error == "invalid ciphertext"
diff --git a/mcp/tests/test_vault_direct.py b/mcp/tests/test_vault_direct.py
deleted file mode 100644
index f1ffcce..0000000
--- a/mcp/tests/test_vault_direct.py
+++ /dev/null
@@ -1,129 +0,0 @@
-#!/usr/bin/env python3
-"""
-Direct Vault Test using FastMCP Client (Legacy)
-
-This test bypasses vault_client.py and uses FastMCP Client directly
-to validate that the Vault MCP (legacy HTTP) server works correctly.
-For gRPC tests, use vault_client.py directly.
-
-Required environment variables:
-  RUNEVAULT_ENDPOINT  Rune-Vault gRPC target (e.g. vault-host:50051)
-  RUNEVAULT_TOKEN     Vault authentication token
-"""
-
-import os
-import sys
-import json
-import asyncio
-
-# Add srcs to path
-ROOT = os.path.dirname(os.path.dirname(__file__))
-SRCS = os.path.join(ROOT, "srcs")
-if SRCS not in sys.path:
-    sys.path.append(SRCS)
-
-from fastmcp import Client
-
-# Configuration (from environment — no hardcoded credentials)
-RUNEVAULT_ENDPOINT = os.environ.get("RUNEVAULT_ENDPOINT", "")
-RUNEVAULT_TOKEN = os.environ.get("RUNEVAULT_TOKEN", "")
-import pytest
-
-if not RUNEVAULT_ENDPOINT or not RUNEVAULT_TOKEN:
-    pytest.skip("Missing required environment variables: RUNEVAULT_ENDPOINT, RUNEVAULT_TOKEN", allow_module_level=True)
-# Ensure endpoint ends with /mcp
-if not RUNEVAULT_ENDPOINT.endswith("/mcp"):
-    RUNEVAULT_ENDPOINT = RUNEVAULT_ENDPOINT.rstrip("/") + "/mcp"
-
-# Sample encrypted blob (from previous test)
-SAMPLE_BLOB = "Cg1pZC05ODZhMTdhNGFlEgKHARqogAQIgCASoYAEAAUAAAAAAAAABQAAAAAA"
-
-print("=" * 80)
-print("Direct Vault Test with FastMCP Client")
-print("=" * 80)
-print(f"Vault Endpoint: {RUNEVAULT_ENDPOINT}")
-print("=" * 80)
-
-@pytest.mark.asyncio
-async def test_vault_tools():
-    """Test Vault MCP tools using FastMCP Client."""
-
-    print("\n[1/3] Connecting to Vault MCP...")
-    try:
-        async with Client(RUNEVAULT_ENDPOINT) as client:
-            print("  ✓ Connected to Vault MCP")
-
-            # List available tools
-            print("\n[2/3] Listing available tools...")
-            tools = await client.list_tools()
-            print(f"  ✓ Found {len(tools)} tools:")
-            for tool in tools:
-                print(f"    - {tool.name}: {tool.description[:60]}...")
-
-            # Test decrypt_scores
-            print("\n[3/3] Testing decrypt_scores tool...")
-            result = await client.call_tool(
-                "decrypt_scores",
-                {
-                    "token": RUNEVAULT_TOKEN,
-                    "encrypted_blob_b64": SAMPLE_BLOB,
-                    "top_k": 3
-                }
-            )
-
-            print(f"  ✓ decrypt_scores returned successfully")
-            print(f"  → Result type: {type(result)}")
-
-            # Extract result data
-            if hasattr(result, 'content'):
-                content = result.content
-                if content and len(content) > 0:
-                    text = content[0].text if hasattr(content[0], 'text') else str(content[0])
-                    print(f"  → Content: {text[:200]}...")
-
-                    # Try to parse as JSON
-                    try:
-                        data = json.loads(text)
-                        print(f"\n  📊 Decryption Result:")
-                        print(f"    - Status: {'SUCCESS' if data.get('ok') else 'FAILED'}")
-                        if data.get('ok'):
-                            results = data.get('results', [])
-                            print(f"    - Results count: {len(results)}")
-                            print(f"    - Request ID: {data.get('request_id')}")
-                            print(f"    - Total vectors: {data.get('total_vectors')}")
-                            print(f"\n    Top-{len(results)} results:")
-                            for i, entry in enumerate(results):
-                                shard = entry.get('shard_idx', '?')
-                                row = entry.get('row_idx', '?')
-                                score = entry.get('score', 0.0)
-                                print(f"      {i+1}. shard={shard}, row={row}, score={score:.4f}")
-                        else:
-                            print(f"    - Error: {data.get('error')}")
-                    except json.JSONDecodeError:
-                        print(f"  → Raw text: {text}")
-            elif hasattr(result, 'data'):
-                print(f"  → Data: {result.data}")
-            else:
-                print(f"  → Result: {result}")
-
-    except Exception as e:
-        print(f"  ✗ Error: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-
-    return True
-
-# Run the test
-print("\nRunning async test...")
-success = asyncio.run(test_vault_tools())
-
-print("\n" + "=" * 80)
-if success:
-    print("✓ VAULT TEST PASSED")
-    print("=" * 80)
-    print("\nVault MCP server is working correctly!")
-    print("Next: Update vault_client.py to use FastMCP Client")
-else:
-    print("✗ VAULT TEST FAILED")
-    print("=" * 80)
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index d162147..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-# Rune Plugin Dependencies
-
-# FHE Encryption SDK
-pyenvector>=1.2.0
-
-# MCP Server Framework
-fastmcp>=2.2.0
-
-# System Monitoring
-psutil>=5.9.0
-
-# Metrics and Observability
-prometheus-client>=0.16.0
-
-# HTTP Client
-httpx>=0.24.0
-
-# JSON handling
-python-json-logger>=2.0.0
-
-# enVector MCP Server Dependencies
-pydantic[email]>=2.11.7
-python-dotenv>=1.2.1
-fastembed>=0.7.4
-sentence-transformers>=3.0.0
-langchain-text-splitters>=1.0.0
-pypdf>=6.4.1
-
-# Testing (optional)
-pytest>=7.0.0
-pytest-asyncio>=0.18.0
-
-# NumPy (required by pyenvector)
-numpy>=1.24.0
-
-# Agents Dependencies
-fastapi>=0.100.0
-uvicorn>=0.22.0
-anthropic>=0.40.0
-openai>=1.40.0
-google-generativeai>=0.8.0
-slack-sdk>=3.0.0
-
-# Language Detection
-langdetect>=1.0.9
diff --git a/scripts/migrate_embeddings.py b/scripts/migrate_embeddings.py
deleted file mode 100644
index cc37de2..0000000
--- a/scripts/migrate_embeddings.py
+++ /dev/null
@@ -1,117 +0,0 @@
-#!/usr/bin/env python3
-"""
-Re-embedding Migration Script
-
-Re-embeds existing records using the new multilingual embedding model.
-Run this after switching from all-MiniLM-L6-v2 to paraphrase-multilingual-MiniLM-L12-v2
-if existing search quality degrades.
-
-Usage:
-    python scripts/migrate_embeddings.py [--dry-run] [--batch-size 50]
-"""
-
-import sys
-import argparse
-from pathlib import Path
-
-# Add project root to path
-PROJECT_ROOT = Path(__file__).parent.parent
-sys.path.insert(0, str(PROJECT_ROOT))
-sys.path.insert(0, str(PROJECT_ROOT / "mcp"))
-
-
-def main():
-    parser = argparse.ArgumentParser(description="Re-embed existing records with multilingual model")
-    parser.add_argument("--dry-run", action="store_true", help="Print what would be done without executing")
-    parser.add_argument("--batch-size", type=int, default=50, help="Number of records to process per batch")
-    parser.add_argument("--collection", type=str, required=True, help="enVector collection/index name (e.g., from Vault)")
-    args = parser.parse_args()
-
-    from agents.common.config import load_config
-    from agents.common.embedding_service import EmbeddingService
-    from agents.common.envector_client import EnVectorClient
-
-    config = load_config()
-
-    print(f"[Migration] Initializing embedding service...")
-    print(f"[Migration] Model: {config.embedding.model}")
-    embedding_svc = EmbeddingService(mode=config.embedding.mode, model=config.embedding.model)
-
-    if not embedding_svc.is_available:
-        print("[Migration] ERROR: Embedding service not available")
-        sys.exit(1)
-
-    # Verify dimension
-    test_vec = embedding_svc.embed_single("test")
-    print(f"[Migration] Embedding dimension: {len(test_vec)}")
-
-    if args.dry_run:
-        print("[Migration] DRY RUN - no changes will be made")
-        print(f"[Migration] Would re-embed records in collection '{args.collection}'")
-        print(f"[Migration] Batch size: {args.batch_size}")
-        return
-
-    print(f"[Migration] Connecting to enVector at {config.envector.endpoint}...")
-    client = EnVectorClient(
-        address=config.envector.endpoint,
-        access_token=config.envector.api_key or None,
-    )
-
-    if not client.is_connected:
-        print("[Migration] ERROR: Could not connect to enVector")
-        sys.exit(1)
-
-    # Fetch all records
-    print("[Migration] Fetching existing records...")
-    try:
-        records = client.list_all(limit=10000)
-    except Exception as e:
-        print(f"[Migration] ERROR: Failed to fetch records: {e}")
-        sys.exit(1)
-
-    total = len(records)
-    print(f"[Migration] Found {total} records to re-embed")
-
-    if total == 0:
-        print("[Migration] No records to migrate")
-        return
-
-    # Process in batches
-    migrated = 0
-    errors = 0
-
-    for i in range(0, total, args.batch_size):
-        batch = records[i:i + args.batch_size]
-        texts = []
-        ids = []
-
-        for record in batch:
-            payload_text = record.get("payload", {}).get("text", "")
-            if payload_text:
-                texts.append(payload_text)
-                ids.append(record.get("id", "unknown"))
-
-        if not texts:
-            continue
-
-        try:
-            print(f"[Migration] Re-embedding batch {i // args.batch_size + 1} ({len(texts)} records)...")
-            embeddings = embedding_svc.embed(texts)
-
-            for record_id, embedding in zip(ids, embeddings):
-                try:
-                    client.update_embedding(record_id, embedding)
-                    migrated += 1
-                except Exception as e:
-                    print(f"[Migration] WARNING: Failed to update {record_id}: {e}")
-                    errors += 1
-
-        except Exception as e:
-            print(f"[Migration] ERROR: Batch embedding failed: {e}")
-            errors += len(texts)
-
-    print(f"[Migration] Complete: {migrated} migrated, {errors} errors, {total} total")
-
-
-if __name__ == "__main__":
-    main()