diff --git a/.mise.ci.toml b/.mise.ci.toml index 38b2877..d1afcbe 100644 --- a/.mise.ci.toml +++ b/.mise.ci.toml @@ -8,4 +8,4 @@ ruff = "0.15" [env] _.python.venv = { path = ".venv", create = true } -PYTHONPATH = "{{config_root}}/vault/proto:{{config_root}}/vault" +PYTHONPATH = "{{config_root}}/vault/_proto:{{config_root}}/vault" diff --git a/.mise.toml b/.mise.toml index c2bbd3f..0913b65 100644 --- a/.mise.toml +++ b/.mise.toml @@ -18,7 +18,7 @@ oci = "3" # ── Environment variables ──────────────────────────────────────────── [env] _.python.venv = { path = ".venv", create = true } -PYTHONPATH = "{{config_root}}/vault/proto:{{config_root}}/vault" +PYTHONPATH = "{{config_root}}/vault/_proto:{{config_root}}/vault" # ── Docker image settings ─────────────────────────────────────────── [vars] diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 2e0d154..789028a 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -115,7 +115,7 @@ Keys are auto-generated on first startup via `ensure_vault()`. ### 2. gRPC Service (API) -Defined in `proto/vault_service.proto` (`rune.vault.v1.VaultService`). +Defined in `_proto/vault_service.proto` (`rune.vault.v1.VaultService`). **Server Configuration**: - Max message size: 256 MB (for EvalKey transfer) diff --git a/install.sh b/install.sh index 15c8879..c0dcb95 100755 --- a/install.sh +++ b/install.sh @@ -331,6 +331,18 @@ prompt_envector_config() { prompt ENVECTOR_API_KEY "enVector API key (e.g. aBcDE_12345_xxxxx)" prompt VAULT_INDEX_NAME "Index name" "runecontext" + print_step "enVector eval mode" + local eval_selected + eval_selected=$(select_menu "mm32" "rmp") + ENVECTOR_EVAL_MODE=$([ "$eval_selected" -eq 0 ] && echo "mm32" || echo "rmp") + print_info "Eval mode: ${ENVECTOR_EVAL_MODE}" + + print_step "enVector TLS" + local tls_selected + tls_selected=$(select_menu "true (managed cloud)" "false (self-hosted, no TLS)") + ENVECTOR_TLS=$([ "$tls_selected" -eq 0 ] && echo "true" || echo "false") + print_info "enVector TLS: ${ENVECTOR_TLS}" + if [ -z "$ENVECTOR_ENDPOINT" ] || [ -z "$ENVECTOR_API_KEY" ]; then print_error "enVector endpoint and API key are required." exit 1 @@ -502,6 +514,8 @@ VAULT_TEAM_SECRET=${VAULT_TEAM_SECRET_VALUE} VAULT_INDEX_NAME=${VAULT_INDEX_NAME} ENVECTOR_ENDPOINT=${ENVECTOR_ENDPOINT} ENVECTOR_API_KEY=${ENVECTOR_API_KEY} +ENVECTOR_EVAL_MODE=${ENVECTOR_EVAL_MODE} +ENVECTOR_TLS=${ENVECTOR_TLS} EMBEDDING_DIM=1024 RUNE_VAULT_TAG=${DOCKER_TAG} ENVEOF diff --git a/scripts/install-dev.sh b/scripts/install-dev.sh index f5be62e..c72c847 100755 --- a/scripts/install-dev.sh +++ b/scripts/install-dev.sh @@ -326,6 +326,18 @@ prompt_envector_config() { prompt ENVECTOR_API_KEY "enVector API key (e.g. aBcDE_12345_xxxxx)" prompt VAULT_INDEX_NAME "Index name" "runecontext" + print_step "enVector eval mode" + local eval_selected + eval_selected=$(select_menu "mm32" "rmp") + ENVECTOR_EVAL_MODE=$([ "$eval_selected" -eq 0 ] && echo "mm32" || echo "rmp") + print_info "Eval mode: ${ENVECTOR_EVAL_MODE}" + + print_step "enVector TLS" + local tls_selected + tls_selected=$(select_menu "true (managed cloud)" "false (self-hosted, no TLS)") + ENVECTOR_TLS=$([ "$tls_selected" -eq 0 ] && echo "true" || echo "false") + print_info "enVector TLS: ${ENVECTOR_TLS}" + if [ -z "$ENVECTOR_ENDPOINT" ] || [ -z "$ENVECTOR_API_KEY" ]; then print_error "enVector endpoint and API key are required." exit 1 @@ -484,7 +496,9 @@ VAULT_TEAM_SECRET=${VAULT_TEAM_SECRET_VALUE} VAULT_INDEX_NAME=${VAULT_INDEX_NAME} ENVECTOR_ENDPOINT=${ENVECTOR_ENDPOINT} ENVECTOR_API_KEY=${ENVECTOR_API_KEY} -EMBEDDING_DIM=768 +ENVECTOR_EVAL_MODE=${ENVECTOR_EVAL_MODE} +ENVECTOR_TLS=${ENVECTOR_TLS} +EMBEDDING_DIM=1024 RUNE_VAULT_TAG=${DOCKER_TAG} ENVEOF diff --git a/tests/unit/test_eval_mode.py b/tests/unit/test_eval_mode.py new file mode 100644 index 0000000..5683830 --- /dev/null +++ b/tests/unit/test_eval_mode.py @@ -0,0 +1,136 @@ +""" +Unit tests for ENVECTOR_EVAL_MODE configuration (PR #59 test plan). + +Test plan coverage: +- Item 1: EVAL_MODE defaults to "mm32" when env var is not set +- Item 2: ev.init() is called with "mm32" when ENVECTOR_EVAL_MODE=mm32 +- Item 3: invalid EVAL_MODE propagates to SDK (no vault-level validation guard) +""" +import sys +import os +import importlib +from unittest.mock import MagicMock, patch + +import pytest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../vault")) + +import vault_core + + +class TestEvalModeEnvVar: + """Test plan item 1 & 2: EVAL_MODE env var reading.""" + + def test_eval_mode_reflects_env_at_load_time(self): + """EVAL_MODE at module level matches what os.getenv would return.""" + expected = os.getenv("ENVECTOR_EVAL_MODE", "mm32").lower() + assert vault_core.EVAL_MODE == expected + + def test_eval_mode_is_lowercased(self, monkeypatch): + """EVAL_MODE applies .lower() so casing doesn't matter.""" + monkeypatch.setattr(vault_core, "EVAL_MODE", "RMP") + # Simulate what the module does: os.getenv(...).lower() + assert vault_core.EVAL_MODE.lower() == "rmp" + + def test_eval_mode_default_is_mm32_when_env_unset(self, monkeypatch): + """When ENVECTOR_EVAL_MODE is not set, the formula produces 'mm32'.""" + monkeypatch.delenv("ENVECTOR_EVAL_MODE", raising=False) + result = os.getenv("ENVECTOR_EVAL_MODE", "mm32").lower() + assert result == "mm32" + + def test_eval_mode_reads_rmp_from_env(self, monkeypatch): + """When ENVECTOR_EVAL_MODE=RMP, the formula produces 'rmp'.""" + monkeypatch.setenv("ENVECTOR_EVAL_MODE", "RMP") + result = os.getenv("ENVECTOR_EVAL_MODE", "mm32").lower() + assert result == "rmp" + + +class TestEvalModePassedToEvInit: + """Test plan item 2 & 3: ev.init() receives EVAL_MODE correctly.""" + + def _setup_offline_patches(self, monkeypatch, tmp_path, *, eval_mode: str): + """Patch vault_core module vars to trigger Phase 2 (cloud init).""" + key_dir = tmp_path / "vault-key" + key_dir.mkdir() + (key_dir / "EncKey.json").write_text("{}") + + monkeypatch.setattr(vault_core, "EVAL_MODE", eval_mode) + monkeypatch.setattr(vault_core, "ENVECTOR_ENDPOINT", "grpc://test:50051") + monkeypatch.setattr(vault_core, "ENVECTOR_API_KEY", "test-key") + monkeypatch.setattr(vault_core, "EMBEDDING_DIM", 1024) + monkeypatch.setattr(vault_core, "KEY_DIR", str(tmp_path)) + monkeypatch.setattr(vault_core, "KEY_SUBDIR", str(key_dir)) + monkeypatch.setattr(vault_core, "KEY_ID", "vault-key") + monkeypatch.setattr(vault_core, "VAULT_INDEX_NAME", None) + + def _patch_pyenvector(self, mock_ev: MagicMock): + """Replace pyenvector in sys.modules with a mock.""" + original = sys.modules.get("pyenvector") + sys.modules["pyenvector"] = mock_ev + return original + + def _restore_pyenvector(self, original): + if original is not None: + sys.modules["pyenvector"] = original + else: + sys.modules.pop("pyenvector", None) + + def test_ensure_vault_passes_eval_mode_mm32_to_ev_init(self, monkeypatch, tmp_path): + """ensure_vault() calls ev.init(eval_mode='mm32') when EVAL_MODE is 'mm32'.""" + self._setup_offline_patches(monkeypatch, tmp_path, eval_mode="mm32") + + mock_ev = MagicMock() + original = self._patch_pyenvector(mock_ev) + try: + vault_core.ensure_vault() + finally: + self._restore_pyenvector(original) + + mock_ev.init.assert_called_once_with( + address="grpc://test:50051", + key_path=str(tmp_path), + key_id="vault-key", + dim=1024, + eval_mode="mm32", + auto_key_setup=True, + access_token="test-key", + query_encryption="plain", + secure=True, + ) + + def test_ensure_vault_passes_eval_mode_rmp_to_ev_init(self, monkeypatch, tmp_path): + """ensure_vault() calls ev.init(eval_mode='rmp') when EVAL_MODE is 'rmp'.""" + self._setup_offline_patches(monkeypatch, tmp_path, eval_mode="rmp") + + mock_ev = MagicMock() + original = self._patch_pyenvector(mock_ev) + try: + vault_core.ensure_vault() + finally: + self._restore_pyenvector(original) + + mock_ev.init.assert_called_once_with( + address="grpc://test:50051", + key_path=str(tmp_path), + key_id="vault-key", + dim=1024, + eval_mode="rmp", + auto_key_setup=True, + access_token="test-key", + query_encryption="plain", + secure=True, + ) + + def test_invalid_eval_mode_rejected_at_vault_level(self, monkeypatch, tmp_path): + """Invalid EVAL_MODE is caught by vault's allowlist guard before reaching the SDK.""" + self._setup_offline_patches(monkeypatch, tmp_path, eval_mode="invalid") + + mock_ev = MagicMock() + original = self._patch_pyenvector(mock_ev) + try: + with pytest.raises(ValueError, match="Invalid ENVECTOR_EVAL_MODE"): + vault_core.ensure_vault() + finally: + self._restore_pyenvector(original) + + mock_ev.init.assert_not_called() diff --git a/tests/unit/test_protovalidate.py b/tests/unit/test_protovalidate.py index 48d2e4d..3cf6cda 100644 --- a/tests/unit/test_protovalidate.py +++ b/tests/unit/test_protovalidate.py @@ -9,7 +9,7 @@ import os sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../vault')) -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../vault/proto')) +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../vault/_proto')) protovalidate = pytest.importorskip("protovalidate") pb2 = pytest.importorskip("vault_service_pb2") diff --git a/tests/unit/test_public_key.py b/tests/unit/test_public_key.py index 2b309b8..2468663 100644 --- a/tests/unit/test_public_key.py +++ b/tests/unit/test_public_key.py @@ -94,3 +94,21 @@ def test_bundle_envector_empty_when_not_configured(self, test_keys, monkeypatch) assert bundle.get("envector_endpoint") is None assert bundle.get("envector_api_key") is None + + def test_bundle_contains_envector_secure_true(self, test_keys, monkeypatch): + """Bundle should propagate ENVECTOR_TLS=true as envector_secure=True.""" + monkeypatch.setattr('vault_core.ENVECTOR_TLS', True) + + result = get_public_key("evt_0000000000000000000000000000demo") + bundle = json.loads(result) + + assert bundle["envector_secure"] is True + + def test_bundle_contains_envector_secure_false(self, test_keys, monkeypatch): + """Bundle should propagate ENVECTOR_TLS=false as envector_secure=False.""" + monkeypatch.setattr('vault_core.ENVECTOR_TLS', False) + + result = get_public_key("evt_0000000000000000000000000000demo") + bundle = json.loads(result) + + assert bundle["envector_secure"] is False diff --git a/vault/.env.example b/vault/.env.example index 42ba518..0199814 100644 --- a/vault/.env.example +++ b/vault/.env.example @@ -53,6 +53,9 @@ NGROK_AUTHTOKEN= ENVECTOR_ENDPOINT= # enVector API key (issued from envector.io dashboard) ENVECTOR_API_KEY= +# TLS for enVector connection (default: true when access_token is set). +# Set to false for self-hosted clusters without TLS. +ENVECTOR_TLS=true # ── Index Settings ────────────────────────────────────────────── # Name of the team index on enVector Cloud. @@ -60,3 +63,11 @@ ENVECTOR_API_KEY= VAULT_INDEX_NAME=runecontext # Embedding dimension (must match your embedding model) EMBEDDING_DIM=1024 + +# ── FHE Evaluation Mode ───────────────────────────────────────── +# Controls the homomorphic encryption evaluation strategy. +# rmp — Relinearization Multiplication Protocol (envector-msa <= 1.2.x) +# mm32 — Multi-Multiplication 32 (envector-msa >= 1.4.0-alpha) +# IMPORTANT: Keys are mode-specific. Changing this requires deleting vault_keys/ +# and restarting Vault so new keys are generated. +ENVECTOR_EVAL_MODE=mm32 diff --git a/vault/.gitignore b/vault/.gitignore index e1e9d07..53bfd6c 100644 --- a/vault/.gitignore +++ b/vault/.gitignore @@ -1,4 +1,4 @@ # Generated proto stubs (regenerate with: make proto-gen) -proto/*_pb2.py -proto/*_pb2_grpc.py +_proto/*_pb2.py +_proto/*_pb2_grpc.py buf/ diff --git a/vault/Dockerfile b/vault/Dockerfile index f902b7a..18493df 100644 --- a/vault/Dockerfile +++ b/vault/Dockerfile @@ -13,7 +13,7 @@ RUN pip install --no-cache-dir "grpcio-tools>=1.60.2,<=1.71.2" "protobuf>=5.29.0 WORKDIR /build COPY buf.yaml buf.lock ./ -COPY proto/vault_service.proto proto/__init__.py proto/ +COPY _proto/vault_service.proto _proto/__init__.py _proto/ COPY scripts/proto-gen.sh scripts/ RUN bash scripts/proto-gen.sh @@ -36,7 +36,7 @@ COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt COPY vault_core.py vault_grpc_server.py token_store.py admin_server.py vault_admin_cli.py request_validator.py validation_interceptor.py audit.py ./ -COPY --from=proto-builder /build/proto/ proto/ +COPY --from=proto-builder /build/_proto/ _proto/ COPY --from=proto-builder /build/buf/ buf/ COPY docker-entrypoint.sh . RUN chmod +x docker-entrypoint.sh vault_admin_cli.py @@ -44,7 +44,7 @@ RUN chmod +x docker-entrypoint.sh vault_admin_cli.py RUN mkdir -p /app/vault_keys /app/certs /app/config /secure/backups /var/log/rune-vault \ && chown -R vault:vault /app /secure /var/log/rune-vault -ENV PYTHONPATH=/app/proto:/app +ENV PYTHONPATH=/app/_proto:/app EXPOSE 50051 diff --git a/vault/proto/__init__.py b/vault/_proto/__init__.py similarity index 100% rename from vault/proto/__init__.py rename to vault/_proto/__init__.py diff --git a/vault/proto/vault_service.proto b/vault/_proto/vault_service.proto similarity index 100% rename from vault/proto/vault_service.proto rename to vault/_proto/vault_service.proto diff --git a/vault/buf.lock b/vault/buf.lock index d15a117..709ae02 100644 --- a/vault/buf.lock +++ b/vault/buf.lock @@ -2,5 +2,5 @@ version: v2 deps: - name: buf.build/bufbuild/protovalidate - commit: 80ab13bee0bf4272b6161a72bf7034e0 - digest: b5:1aa6a965be5d02d64e1d81954fa2e78ef9d1e33a0c30f92bc2626039006a94deb3a5b05f14ed8893f5c3ffce444ac008f7e968188ad225c4c29c813aa5f2daa1 + commit: 50325440f8f24053b047484a6bf60b76 + digest: b5:74cb6f5c0853c3c10aafc701614194bbd63326bdb8ef4068214454b8894b03ba4113e04b3a33a8321cdf05336e37db4dc14a5e2495db8462566914f36086ba31 diff --git a/vault/buf.yaml b/vault/buf.yaml index 87c1dfe..75077dc 100644 --- a/vault/buf.yaml +++ b/vault/buf.yaml @@ -1,5 +1,5 @@ version: v2 modules: - - path: proto + - path: _proto deps: - buf.build/bufbuild/protovalidate diff --git a/vault/docker-compose.yml b/vault/docker-compose.yml index 1b5d598..774ab64 100644 --- a/vault/docker-compose.yml +++ b/vault/docker-compose.yml @@ -25,6 +25,8 @@ services: - ENVECTOR_ENDPOINT=${ENVECTOR_ENDPOINT:-} - ENVECTOR_API_KEY=${ENVECTOR_API_KEY:-} - EMBEDDING_DIM=${EMBEDDING_DIM:-1024} + - ENVECTOR_EVAL_MODE=${ENVECTOR_EVAL_MODE:-mm32} + - ENVECTOR_TLS=${ENVECTOR_TLS:-true} - VAULT_AUDIT_LOG=${VAULT_AUDIT_LOG:-file} networks: @@ -47,13 +49,12 @@ services: deploy: resources: limits: - memory: 1G - cpus: "1.0" + memory: 10G + cpus: "2.0" reservations: memory: 512M cpus: "0.5" - networks: vault-net: driver: bridge diff --git a/vault/scripts/proto-gen.sh b/vault/scripts/proto-gen.sh index 084cc13..6c08a61 100755 --- a/vault/scripts/proto-gen.sh +++ b/vault/scripts/proto-gen.sh @@ -31,12 +31,12 @@ print(os.path.join(os.path.dirname(grpc_tools.__file__), '_proto')) # ── 4. Generate vault_service stubs ───────────────────────────────── echo "[proto-gen] Generating vault_service_pb2.py / vault_service_pb2_grpc.py..." python3 -m grpc_tools.protoc \ - -Iproto \ + -I_proto \ -I"$DEPS_DIR" \ -I"$GRPC_PROTO" \ - --python_out=proto \ - --grpc_python_out=proto \ - proto/vault_service.proto + --python_out=_proto \ + --grpc_python_out=_proto \ + _proto/vault_service.proto # ── 5. Generate protovalidate runtime stubs ────────────────────────── echo "[proto-gen] Generating buf/validate/validate_pb2.py..." diff --git a/vault/vault_core.py b/vault/vault_core.py index 3556abc..d532526 100644 --- a/vault/vault_core.py +++ b/vault/vault_core.py @@ -11,6 +11,7 @@ import json import logging import os +import shutil from cryptography.hazmat.primitives import hashes from cryptography.hazmat.primitives.kdf.hkdf import HKDF @@ -37,6 +38,9 @@ ENVECTOR_ENDPOINT = os.getenv("ENVECTOR_ENDPOINT", "").strip() or None ENVECTOR_API_KEY = os.getenv("ENVECTOR_API_KEY", "").strip() or None EMBEDDING_DIM = int(os.getenv("EMBEDDING_DIM", "1024")) +_VALID_EVAL_MODES = {"rmp", "mm32"} +EVAL_MODE = os.getenv("ENVECTOR_EVAL_MODE", "mm32").lower() +ENVECTOR_TLS = os.getenv("ENVECTOR_TLS", "").strip().lower() not in ("false", "0", "no") # Team index name (set by admin, distributed to all team members via get_public_key) VAULT_INDEX_NAME = os.getenv("VAULT_INDEX_NAME", "").strip() or None @@ -50,15 +54,32 @@ def ensure_vault(): (SDK handles key registration → loading) 3. Create the team index if it doesn't exist """ + if EVAL_MODE not in _VALID_EVAL_MODES: + raise ValueError( + f"Invalid ENVECTOR_EVAL_MODE={EVAL_MODE!r}. " + f"Must be one of: {sorted(_VALID_EVAL_MODES)}. " + "If you changed this value, delete vault_keys/ and restart Vault." + ) + import pyenvector as ev # Phase 1: local key generation enc_key = os.path.join(KEY_SUBDIR, "EncKey.json") if not os.path.exists(enc_key): + if os.path.exists(KEY_SUBDIR) and any(os.scandir(KEY_SUBDIR)): + logger.warning( + f"Incomplete key state found in {KEY_SUBDIR} (EncKey.json missing). " + "Cleaning up and regenerating..." + ) + shutil.rmtree(KEY_SUBDIR) logger.info(f"Generating keys in {KEY_SUBDIR}...") os.makedirs(KEY_SUBDIR, exist_ok=True) keygen = KeyGenerator( - key_path=KEY_SUBDIR, key_id=KEY_ID, dim_list=[DIM], metadata_encryption=False + key_path=KEY_SUBDIR, + key_id=KEY_ID, + dim_list=[DIM], + metadata_encryption=False, + eval_mode=EVAL_MODE, ) keygen.generate_keys() else: @@ -76,10 +97,11 @@ def ensure_vault(): key_path=KEY_DIR, key_id=KEY_ID, dim=EMBEDDING_DIM, - eval_mode="rmp", + eval_mode=EVAL_MODE, auto_key_setup=True, access_token=ENVECTOR_API_KEY, query_encryption="plain", + secure=ENVECTOR_TLS, ) logger.info("Key registered on enVector Cloud (auto_key_setup).") except Exception as e: @@ -90,10 +112,11 @@ def ensure_vault(): key_path=KEY_DIR, key_id=KEY_ID, dim=EMBEDDING_DIM, - eval_mode="rmp", + eval_mode=EVAL_MODE, auto_key_setup=False, access_token=ENVECTOR_API_KEY, query_encryption="plain", + secure=ENVECTOR_TLS, ) logger.info("Connected to enVector Cloud (auto_key_setup=False).") @@ -115,7 +138,7 @@ def ensure_vault(): ev.create_index( index_name=VAULT_INDEX_NAME, dim=EMBEDDING_DIM, - index_params={"index_type": "FLAT"}, + index_params={"index_type": "IVF_VCT", "nlist": 8192, "default_nprobe": 6}, query_encryption="plain", metadata_encryption=False, # workaround: skip deepcopy metadata_key property access (pyenvector#247) @@ -229,6 +252,7 @@ def _get_public_key_impl(token: str) -> str: # enVector Cloud credentials — agents receive these from Vault instead of user input bundle["envector_endpoint"] = ENVECTOR_ENDPOINT bundle["envector_api_key"] = ENVECTOR_API_KEY + bundle["envector_secure"] = ENVECTOR_TLS return json.dumps(bundle) diff --git a/vault/vault_grpc_server.py b/vault/vault_grpc_server.py index eb7df7b..eaf9499 100644 --- a/vault/vault_grpc_server.py +++ b/vault/vault_grpc_server.py @@ -14,12 +14,12 @@ from datetime import datetime, timezone import grpc +import vault_service_pb2 as pb2 +import vault_service_pb2_grpc as pb2_grpc from admin_server import start_admin_server from grpc_health.v1 import health_pb2, health_pb2_grpc from grpc_health.v1.health import HealthServicer from grpc_reflection.v1alpha import reflection -from proto import vault_service_pb2 as pb2 -from proto import vault_service_pb2_grpc as pb2_grpc from token_store import ( RateLimitError, ScopeError, @@ -44,7 +44,9 @@ logger = logging.getLogger("rune.vault.grpc") -MAX_MESSAGE_LENGTH = 256 * 1024 * 1024 # 256 MB (EvalKey can be tens of MB) +MAX_MESSAGE_LENGTH = ( + 2000 * 1024 * 1024 +) # ~1.95 GB (kept under INT32_MAX; EvalKey in pyenvector >= 1.4.0 reaches ~1.2 GB) def _emit_audit(method, user, top_k, result_count, status, error_detail, duration, context):