CryptoLabInc · inkme9 · Nov 17, 2025 · Oct 12, 2025 · Oct 12, 2025 · Oct 12, 2025
diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
@@ -0,0 +1,35 @@
+name: PR Checks
+
+on:
+  pull_request:
+    branches:
+      - main
+
+concurrency:
+  group: pr-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  checks:
+    runs-on: [self-hosted, linux, no-gpu]
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip setuptools wheel
+          python -m pip install -e .
+          python -m pip install pytest pre-commit
+
+      - name: Lint and format
+        run: pre-commit run --all-files --show-diff-on-failure
+
+      - name: Run unit tests
+        run: python -m pytest -q -m "not integration"
diff --git a/.gitignore b/.gitignore
@@ -36,6 +36,16 @@ Thumbs.db
 
 # Sensitive local data
 keys/
+VECTORSTORE.md
+
+# External symlinks (local workspace references)
+es2-msa
+es2-msa/
+es2-deploy
+es2-deploy/
+
+# Local helper scripts
+run_unit_tests.py
 
 # Jupyter
 .ipynb_checkpoints/

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,10 @@
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.7.1
+    hooks:
+      - id: ruff
+  - repo: https://github.com/psf/black
+    rev: 24.10.0
+    hooks:
+      - id: black
+        language_version: python3.11
diff --git a/README.md b/README.md
@@ -13,12 +13,12 @@ Encrypted vector search for LangChain using Envector (ES2), powered by homomorph
   - `python3.11 -m venv .venv && source .venv/bin/activate`
 - Install runtime dependencies:
   - `pip install -U pip setuptools wheel`
-  - `pip install es2==1.1.0 langchain sentence-transformers`
+  - `pip install es2 langchain sentence-transformers`
 
 ## Usage Overview
 1. Configure Envector using `EnvectorConfig`, pointing to your ES2 endpoint and keys.
 2. Initialize embeddings (or provide pre-computed vectors).
-3. Instantiate `Envector(config=cfg, embeddings=emb)` and call `add_texts` or `as_retriever`.
+3. Instantiate `Envector(config=cfg, embeddings=emb)` and call `add_texts`, `add_documents`, or use `as_retriever`.
 4. Run `similarity_search` or plug the retriever into your LangChain pipeline.
 
 > See `notebooks/` for end-to-end walkthroughs and the `libs/envector` package for implementation details.
@@ -27,7 +27,7 @@ Encrypted vector search for LangChain using Envector (ES2), powered by homomorph
 Key dataclasses live in `libs/envector/config.py`:
 - `ConnectionConfig`: address or host/port for ES2.
 - `KeyConfig`: key path, key ID, optional preset/eval mode.
-- `IndexSettings`: index name, dimension (16–4096), query encryption mode, optional output fields and fetch parameters.
+- `IndexSettings`: index name, dimension (32–4096), query encryption mode, optional output fields and fetch parameters.
 - `EnvectorConfig`: wraps the above and enables auto-creation via `create_if_missing`.
 
 ## Data Model
@@ -41,10 +41,66 @@ Key dataclasses live in `libs/envector/config.py`:
 - Manual item IDs are not accepted; returned IDs from `add_texts` are ephemeral.
 - Filtering happens client-side; ensure metadata is JSON for structured filters.
 
+## Examples
+- Configuration
+  ```python
+  from langchain_envector.config import ConnectionConfig, EnvectorConfig, IndexSettings, KeyConfig
+
+  cfg = EnvectorConfig(
+      connection=ConnectionConfig(
+        address=ES2_ADDRESS, 
+        access_token=ES2_ACCESS_TOKEN
+      ),
+      key=KeyConfig(
+        key_path=ES2_KEY_PATH, 
+        key_id=ES2_KEY_ID, 
+        preset="ip", 
+        eval_mode="rmp"
+      ),
+      index=IndexSettings(
+        index_name=INDEX_NAME, 
+        dim=vector_dim, 
+        query_encryption="cipher"
+      ),
+      create_if_missing=True,
+  )
+  ```
+
+- Add documents (from LangChain Documents):
+
+  ```python
+  from langchain_core.documents import Document
+  from langchain_envector.vectorstore import Envector
+
+  docs = [
+    Document(
+      page_content="chunk-1", 
+      metadata={"source": "paper.pdf", "page": 1, "chunk": 0}
+    ),
+    Document(
+      page_content="chunk-2", 
+      metadata={"source": "paper.pdf", "page": 1, "chunk": 1}
+    ),
+  ]
+
+  store = Envector(config=cfg, embeddings=emb)
+  store.add_documents(docs)
+  ```
+
 ## Troubleshooting
 - Connection issues: verify ES2 address and registered keys.
 - Embeddings mismatch: ensure embedding dimension equals `index.dim` when supplying vectors.
 - Unexpected raw strings: confirm inserts used the JSON envelope.
+- Key Issues: check key's metadata to sync with the registered key if facing any key issue.
+
+## Testing Without ES2
+- Run unit tests offline (no ES2 or SDK required):
+  - `python -m pytest -q -m "not integration"`
+  - or `python scripts/run_unit_tests.py`
+- Run integration tests (requires server and keys):
+  - Export `ES2_ADDRESS`, `ES2_KEY_PATH`, `ES2_KEY_ID`
+  - Optional: `ES2_USE_EMBEDDINGS=1`, `ES2_EMB_MODEL`, `ES2_USE_HF_DATASET=1`
+  - `python -m pytest -q -m integration -s`
 
 ## Contributing
 See [`CONTRIBUTE.md`](CONTRIBUTE.md) for development, testing, and PR guidelines.
diff --git a/libs/envector/examples/basic_usage.py b/libs/envector/examples/basic_usage.py
@@ -8,15 +8,22 @@
 
 from __future__ import annotations
 
-from libs.envector.config import ConnectionConfig, EnvectorConfig, IndexSettings, KeyConfig
+from libs.envector.config import (
+    ConnectionConfig,
+    EnvectorConfig,
+    IndexSettings,
+    KeyConfig,
+)
 from libs.envector.vectorstore import Envector
 
 
 def main():
     # Replace with your actual settings
     cfg = EnvectorConfig(
         connection=ConnectionConfig(address="localhost:50050"),
-        key=KeyConfig(key_path="./keys", key_id="example_key", preset="ip", eval_mode="rmp"),
+        key=KeyConfig(
+            key_path="./keys", key_id="example_key", preset="ip", eval_mode="rmp"
+        ),
         index=IndexSettings(index_name="demo", dim=384, query_encryption="plain"),
         create_if_missing=True,
     )
@@ -43,4 +50,3 @@ def main():
 
 if __name__ == "__main__":
     main()
-
diff --git a/libs/envector/examples/cipher_query.py b/libs/envector/examples/cipher_query.py
@@ -6,15 +6,24 @@
 
 from __future__ import annotations
 
-from libs.envector.config import ConnectionConfig, EnvectorConfig, IndexSettings, KeyConfig
+from libs.envector.config import (
+    ConnectionConfig,
+    EnvectorConfig,
+    IndexSettings,
+    KeyConfig,
+)
 from libs.envector.vectorstore import Envector
 
 
 def main():
     cfg = EnvectorConfig(
         connection=ConnectionConfig(address="localhost:50050"),
-        key=KeyConfig(key_path="./keys", key_id="example_key", preset="ip", eval_mode="rmp"),
-        index=IndexSettings(index_name="demo_cipher", dim=384, query_encryption="cipher"),
+        key=KeyConfig(
+            key_path="./keys", key_id="example_key", preset="ip", eval_mode="rmp"
+        ),
+        index=IndexSettings(
+            index_name="demo_cipher", dim=384, query_encryption="cipher"
+        ),
         create_if_missing=True,
     )
 
@@ -38,4 +47,3 @@ def main():
 
 if __name__ == "__main__":
     main()
-
diff --git a/libs/envector/examples/ingest_synthetic_1k.py b/libs/envector/examples/ingest_synthetic_1k.py
@@ -19,7 +19,12 @@
 from pathlib import Path
 from typing import List
 
-from libs.envector.config import ConnectionConfig, EnvectorConfig, IndexSettings, KeyConfig
+from libs.envector.config import (
+    ConnectionConfig,
+    EnvectorConfig,
+    IndexSettings,
+    KeyConfig,
+)
 from libs.envector.vectorstore import Envector
 
 
@@ -34,7 +39,12 @@ def main():
     ap.add_argument("--key-path", required=True)
     ap.add_argument("--key-id", required=True)
     ap.add_argument("--index-name", required=True)
-    ap.add_argument("--dim", type=int, required=False, help="If omitted and --use-embeddings, infer from model.")
+    ap.add_argument(
+        "--dim",
+        type=int,
+        required=False,
+        help="If omitted and --use-embeddings, infer from model.",
+    )
     ap.add_argument("--dataset", default="data/synthetic_rag_1k.jsonl")
     ap.add_argument("--use-embeddings", action="store_true")
     ap.add_argument("--model", default="sentence-transformers/all-MiniLM-L6-v2")
@@ -52,7 +62,9 @@ def main():
 
     cfg = EnvectorConfig(
         connection=ConnectionConfig(address=args.address),
-        key=KeyConfig(key_path=args.key_path, key_id=args.key_id, preset="ip", eval_mode="rmp"),
+        key=KeyConfig(
+            key_path=args.key_path, key_id=args.key_id, preset="ip", eval_mode="rmp"
+        ),
         index=IndexSettings(
             index_name=args.index_name,
             dim=(args.dim if args.dim is not None else inferred_dim or 0),
@@ -76,7 +88,9 @@ def main():
         if embeddings is None:
             # Without embeddings, require manual vectors; here we simply skip.
             # Users should provide --use-embeddings or adapt to their vector source.
-            raise ValueError("--use-embeddings is required unless you provide vectors explicitly.")
+            raise ValueError(
+                "--use-embeddings is required unless you provide vectors explicitly."
+            )
         store.add_texts(t_batch, metadatas=m_batch)
 
     print(f"Inserted {len(texts)} documents into index '{args.index_name}'")

diff --git a/libs/envector/langchain_envector/__init__.py b/libs/envector/langchain_envector/__init__.py
@@ -7,5 +7,10 @@
 from .vectorstore import Envector
 from .config import ConnectionConfig, EnvectorConfig, IndexSettings, KeyConfig
 
-__all__ = ["Envector", "ConnectionConfig", "EnvectorConfig", "IndexSettings", "KeyConfig"]
-
+__all__ = [
+    "Envector",
+    "ConnectionConfig",
+    "EnvectorConfig",
+    "IndexSettings",
+    "KeyConfig",
+]
diff --git a/libs/envector/langchain_envector/client.py b/libs/envector/langchain_envector/client.py
@@ -1,7 +1,5 @@
 from __future__ import annotations
 
-from typing import Optional
-
 from .config import EnvectorConfig
 
 
@@ -34,7 +32,9 @@ def init(self):
         else:
             if not (c.host and c.port):
                 raise ValueError("Either address or host+port must be provided.")
-            es2_client.init_connect(host=c.host, port=c.port, access_token=c.access_token)
+            es2_client.init_connect(
+                host=c.host, port=c.port, access_token=c.access_token
+            )
 
         # Key path baseline for Index
         from es2.index import Index as _Index
@@ -79,4 +79,3 @@ def es2(self):
         if self._es2 is None:
             raise RuntimeError("Client not initialized. Call init().")
         return self._es2
-
diff --git a/libs/envector/langchain_envector/config.py b/libs/envector/langchain_envector/config.py
@@ -39,4 +39,3 @@ class EnvectorConfig:
     key: KeyConfig
     index: IndexSettings
     create_if_missing: bool = True
-
diff --git a/libs/envector/langchain_envector/retriever.py b/libs/envector/langchain_envector/retriever.py
@@ -12,7 +12,9 @@
 
 
 class EnvectorRetriever:
-    def __init__(self, store: Envector, *, search_kwargs: Optional[Dict[str, Any]] = None) -> None:
+    def __init__(
+        self, store: Envector, *, search_kwargs: Optional[Dict[str, Any]] = None
+    ) -> None:
         self.store = store
         self.search_kwargs = search_kwargs or {}
 

diff --git a/libs/envector/langchain_envector/types.py b/libs/envector/langchain_envector/types.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import Any, Callable, Dict, List, Optional, Protocol, Tuple, Union, overload
+from typing import Any, Callable, Dict, List, Optional, Protocol
 
 
 class Embeddings(Protocol):
@@ -10,10 +10,14 @@ class Embeddings(Protocol):
     LangChain-compatible embeddings typically implement these two methods.
     """
 
-    def embed_documents(self, texts: List[str]) -> List[List[float]]:  # pragma: no cover - interface only
+    def embed_documents(
+        self, texts: List[str]
+    ) -> List[List[float]]:  # pragma: no cover - interface only
         ...
 
-    def embed_query(self, text: str) -> List[float]:  # pragma: no cover - interface only
+    def embed_query(
+        self, text: str
+    ) -> List[float]:  # pragma: no cover - interface only
         ...
 
 
@@ -53,8 +57,6 @@ def unpack_metadata(raw: Any) -> Dict[str, Any]:
     if isinstance(raw, dict):
         return raw
 
-    print("slafjklshglkhslafhlksadjlghsal;hf")
-
     # Some responses wrap the payload in a single-element list.
     if isinstance(raw, (list, tuple)):
         if len(raw) == 1:
@@ -94,8 +96,13 @@ def unpack_metadata(raw: Any) -> Dict[str, Any]:
 
 # --- Embeddings adaptation helpers -----------------------------------------------------
 
+
 class _CallableEmbeddings:
-    def __init__(self, docs_fn: Callable[[List[str]], List[List[float]]], query_fn: Callable[[str], List[float]]):
+    def __init__(
+        self,
+        docs_fn: Callable[[List[str]], List[List[float]]],
+        query_fn: Callable[[str], List[float]],
+    ):
         self._docs_fn = docs_fn
         self._query_fn = query_fn
 
@@ -132,7 +139,12 @@ def query_fn(text: str) -> List[float]:
         return _CallableEmbeddings(docs_fn, query_fn)
 
     # Case 3: Tuple of callables
-    if isinstance(emb, tuple) and len(emb) == 2 and callable(emb[0]) and callable(emb[1]):
+    if (
+        isinstance(emb, tuple)
+        and len(emb) == 2
+        and callable(emb[0])
+        and callable(emb[1])
+    ):
         docs_fn, query_fn = emb  # type: ignore[assignment]
         return _CallableEmbeddings(docs_fn, query_fn)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -39,4 +39,3 @@ class EnvectorConfig:
		key: KeyConfig
		index: IndexSettings
		create_if_missing: bool = True