From c4418ecc8d6b8f03e995ee28962b956e2a10499a Mon Sep 17 00:00:00 2001
From: inkme <inkme@cryptolab.co.kr>
Date: Sun, 12 Oct 2025 22:13:34 +0900
Subject: [PATCH 01/13] ES2-975: Implement add_documents in VectorStore, add
 unit tests, offline test runner, and doc updates (README, VECTORSTORE.md)

---
 README.md                                     | 22 ++++-
 VECTORSTORE.md                                | 74 +++++++++++++++
 es2-msa                                       |  1 +
 libs/envector/langchain_envector/client.py    |  1 -
 .../langchain_envector/vectorstore.py         | 22 +++++
 run_unit_tests.py                             | 23 +++++
 tests/test_vectorstore.py                     | 93 ++++++++++++++++++-
 7 files changed, 233 insertions(+), 3 deletions(-)
 create mode 100644 VECTORSTORE.md
 create mode 120000 es2-msa
 create mode 100644 run_unit_tests.py

diff --git a/README.md b/README.md
index 5662696..7a231ae 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ Encrypted vector search for LangChain using Envector (ES2), powered by homomorph
 ## Usage Overview
 1. Configure Envector using `EnvectorConfig`, pointing to your ES2 endpoint and keys.
 2. Initialize embeddings (or provide pre-computed vectors).
-3. Instantiate `Envector(config=cfg, embeddings=emb)` and call `add_texts` or `as_retriever`.
+3. Instantiate `Envector(config=cfg, embeddings=emb)` and call `add_texts`, `add_documents`, or use `as_retriever`.
 4. Run `similarity_search` or plug the retriever into your LangChain pipeline.
 
 > See `notebooks/` for end-to-end walkthroughs and the `libs/envector` package for implementation details.
@@ -41,10 +41,30 @@ Key dataclasses live in `libs/envector/config.py`:
 - Manual item IDs are not accepted; returned IDs from `add_texts` are ephemeral.
 - Filtering happens client-side; ensure metadata is JSON for structured filters.
 
+## Examples
+- Add documents (from LangChain Documents):
+  - Python
+    - from langchain_core.documents import Document
+    - docs = [
+        Document(page_content="chunk-1", metadata={"source": "paper.pdf", "page": 1, "chunk": 0}),
+        Document(page_content="chunk-2", metadata={"source": "paper.pdf", "page": 1, "chunk": 1}),
+      ]
+    - store = Envector(config=cfg, embeddings=emb)
+    - store.add_documents(docs)
+
 ## Troubleshooting
 - Connection issues: verify ES2 address and registered keys.
 - Embeddings mismatch: ensure embedding dimension equals `index.dim` when supplying vectors.
 - Unexpected raw strings: confirm inserts used the JSON envelope.
 
+## Testing Without ES2
+- Run unit tests offline (no ES2 or SDK required):
+  - `python -m pytest -q -m "not integration"`
+  - or `python run_unit_tests.py`
+- Run integration tests (requires server and keys):
+  - Export `ES2_ADDRESS`, `ES2_KEY_PATH`, `ES2_KEY_ID`
+  - Optional: `ES2_USE_EMBEDDINGS=1`, `ES2_EMB_MODEL`, `ES2_USE_HF_DATASET=1`
+  - `python -m pytest -q -m integration -s`
+
 ## Contributing
 See [`CONTRIBUTE.md`](CONTRIBUTE.md) for development, testing, and PR guidelines.
diff --git a/VECTORSTORE.md b/VECTORSTORE.md
new file mode 100644
index 0000000..7bcfd3b
--- /dev/null
+++ b/VECTORSTORE.md
@@ -0,0 +1,74 @@
+# LangChain VectorStore API 지원 현황
+
+이 문서는 LangChain의 VectorStore 인터페이스와 현재 envector에서 지원하는 기능들을 비교 분석한 결과입니다.
+
+## API 지원 현황 테이블
+
+| 메서드 | 설명 | 현재 상태 | 비고 |
+|--------|------|-----------|------|
+| **문서 추가/관리** |
+| `add_documents(documents)` | Document 객체로 문서 추가 | 🔧 구현 가능 | `add_texts` 래핑으로 구현 가능 |
+| `add_texts(texts, metadatas, ids)` | 텍스트로 직접 추가 | ✅ 구현됨 | 완전 지원 |
+| `add_documents(documents)` | 문서 추가 | ✅ 지원 | `add_texts` 위임, 임베딩/벡터 경로 지원 |
+| `upsert_documents(documents)` | 문서 추가/업데이트 | ❌ 구현 불가 | ES2 SDK 제한으로 불가능 |
+| `upsert_texts(texts, metadatas, ids)` | 텍스트 추가/업데이트 | ❌ 구현 불가 | ES2 SDK 제한으로 불가능 |
+| **문서 삭제** |
+| `delete(ids)` | ID로 문서 삭제 | ❌ 구현 불가 | ES2 SDK 제한으로 불가능 |
+| `delete_documents(documents)` | Document 객체로 삭제 | ❌ 구현 불가 | ES2 SDK 제한으로 불가능 |
+| **검색** |
+| `similarity_search(query, k, filter)` | 유사도 검색 | ✅ 구현됨 | 완전 지원 |
+| `similarity_search_with_score(query, k, filter)` | 점수와 함께 유사도 검색 | 🔧 구현 가능 | `_score`를 메타데이터로 제공 중 |
+| `similarity_search_by_vector(embedding, k, filter)` | 벡터로 직접 검색 | ✅ 구현됨 | 완전 지원 |
+| `similarity_search_with_score_by_vector(embedding, k, filter)` | 벡터로 점수와 함께 검색 | 🔧 구현 가능 | `_score`를 메타데이터로 제공 중 |
+| **팩토리 메서드** |
+| `from_texts(texts, embedding, metadatas)` | 텍스트로부터 생성 | ✅ 구현됨 | 완전 지원 |
+| `from_documents(documents, embedding)` | Document로부터 생성 | ✅ 구현됨 | 완전 지원 |
+| **기타** |
+| `as_retriever(**kwargs)` | VectorStoreRetriever로 변환 | ✅ 구현됨 | 완전 지원 |
+
+### 범례
+- ✅ **구현됨**: 현재 완전히 구현되어 사용 가능
+- 🔧 **구현 가능**: 현재 구현되지 않았지만 기술적으로 구현 가능
+- ❌ **구현 불가**: ES2 SDK 제한으로 인해 구현 불가능
+
+## 지원 현황 요약
+
+### ✅ 구현됨 (6개)
+- `add_texts` - 텍스트 추가
+- `similarity_search` - 유사도 검색
+- `similarity_search_by_vector` - 벡터 검색
+- `from_texts` - 팩토리 메서드
+- `from_documents` - 팩토리 메서드
+- `as_retriever` - 리트리버 변환
+
+### 🔧 구현 가능 (3개)
+- `add_documents` - Document 객체 추가 (래핑으로 구현 가능)
+- `similarity_search_with_score` - 점수와 함께 검색 (현재 `_score` 메타데이터로 제공)
+- `similarity_search_with_score_by_vector` - 벡터로 점수와 함께 검색 (현재 `_score` 메타데이터로 제공)
+
+### ❌ 구현 불가 (4개)
+- `add_documents` - Document 리스트 삽입 (지원)
+- `upsert_documents` - 문서 업서트 (ES2 SDK 제한)
+- `upsert_texts` - 텍스트 업서트 (ES2 SDK 제한)
+- `delete` - ID로 삭제 (ES2 SDK 제한)
+- `delete_documents` - Document 삭제 (ES2 SDK 제한)
+
+## 주요 제한사항
+
+1. **개별 문서 삭제/업데이트 불가**: envector는 개별 문서의 삭제나 업데이트를 지원하지 않습니다. 전체 인덱스를 삭제해야 합니다.
+2. **IDs 무시**: `add_documents`/`add_texts`에서 사용자 제공 ID는 무시됩니다. 반환값은 서버 영속 ID가 아닌 일시적 식별자입니다.
+
+2. **upsert 기능 없음**: 문서의 추가/업데이트를 한 번에 처리하는 upsert 기능이 없습니다.
+
+3. **점수 반환 방식**: `similarity_search_with_score` 메서드는 없지만, `similarity_search`에서 `_score`를 메타데이터로 제공합니다.
+
+## 사용 권장사항
+
+- **문서 추가**: `add_texts` 메서드 사용
+- **검색**: `similarity_search` 또는 `similarity_search_by_vector` 사용
+- **점수 확인**: 검색 결과의 `metadata['_score']`에서 점수 확인
+- **RAG 파이프라인**: `as_retriever()`를 사용하여 LangChain의 RAG 워크플로우에 통합
+
+## 호환성
+
+envector는 LangChain의 핵심 VectorStore 기능을 지원하여 기본적인 RAG(Retrieval-Augmented Generation) 워크플로우를 구현하는 데 충분합니다. 다만 개별 문서 관리가 필요한 경우에는 다른 VectorStore 구현체를 고려해야 합니다.
diff --git a/es2-msa b/es2-msa
new file mode 120000
index 0000000..44b78b8
--- /dev/null
+++ b/es2-msa
@@ -0,0 +1 @@
+/Users/inkme/git/es2-msa
\ No newline at end of file
diff --git a/libs/envector/langchain_envector/client.py b/libs/envector/langchain_envector/client.py
index c8b2952..6329c27 100644
--- a/libs/envector/langchain_envector/client.py
+++ b/libs/envector/langchain_envector/client.py
@@ -79,4 +79,3 @@ def es2(self):
         if self._es2 is None:
             raise RuntimeError("Client not initialized. Call init().")
         return self._es2
-
diff --git a/libs/envector/langchain_envector/vectorstore.py b/libs/envector/langchain_envector/vectorstore.py
index b56d0a0..0011deb 100644
--- a/libs/envector/langchain_envector/vectorstore.py
+++ b/libs/envector/langchain_envector/vectorstore.py
@@ -178,6 +178,28 @@ def similarity_search_by_vector(
     # -------------------------------
     # Class constructors (LangChain compatibility)
     # -------------------------------
+    def add_documents(
+        self,
+        documents: List[Document],
+        ids: Optional[List[str]] = None,
+        *,
+        vectors: Optional[List[List[float]]] = None,
+        **kwargs: Any,
+    ) -> List[int]:
+        """Insert a list of Documents.
+
+        Mirrors LangChain's VectorStore API. Delegates to `add_texts` by
+        extracting `page_content` and `metadata` from each Document.
+
+        Notes:
+        - Manual `ids` are ignored (ES2 does not support user-provided IDs).
+        - When `embeddings` is not configured, you must supply `vectors`.
+        - Returns ephemeral IDs as produced by the client insert.
+        """
+        texts = [getattr(d, "page_content", "") for d in documents]
+        metadatas = [getattr(d, "metadata", {}) for d in documents]
+        return self.add_texts(texts=texts, metadatas=metadatas, ids=ids, vectors=vectors, **kwargs)
+
     @classmethod
     def from_texts(
         cls,
diff --git a/run_unit_tests.py b/run_unit_tests.py
new file mode 100644
index 0000000..35665c7
--- /dev/null
+++ b/run_unit_tests.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+"""Run unit tests without requiring a running Envector (ES2) server.
+
+This script runs pytest while excluding tests marked as `integration`.
+It is safe to use in environments without the es2 SDK or server.
+"""
+
+import sys
+import subprocess
+
+
+def main() -> int:
+    cmd = [sys.executable, "-m", "pytest", "-q", "-m", "not integration"]
+    try:
+        return subprocess.call(cmd)
+    except FileNotFoundError:
+        print("pytest not found. Install with: python -m pip install pytest", file=sys.stderr)
+        return 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
+
diff --git a/tests/test_vectorstore.py b/tests/test_vectorstore.py
index a1bb8c7..50c4920 100644
--- a/tests/test_vectorstore.py
+++ b/tests/test_vectorstore.py
@@ -3,7 +3,7 @@
 import re
 
 from langchain_envector.config import ConnectionConfig, EnvectorConfig, IndexSettings, KeyConfig
-from langchain_envector.vectorstore import Envector
+from langchain_envector.vectorstore import Envector, Document as LC_Document
 
 from .conftest import FakeClient, FakeEmbeddings, FakeIndex
 
@@ -96,3 +96,94 @@ def test_similarity_search_handles_python_literal_metadata():
 
 
     # dict-type metadata is not supported currently; only text-based
+
+
+def test_similarity_search_by_vector_with_filter_and_threshold():
+    index = FakeIndex()
+    index.search_payload = [[
+        {"id": "v-0", "score": 0.88, "metadata": "{\"text\": \"Keep\", \"metadata\": {\"k\": 1}}"},
+        {"id": "v-1", "score": 0.30, "metadata": "{\"text\": \"Drop\", \"metadata\": {\"k\": 2}}"},
+    ]]
+    client = FakeClient(index)
+    store = Envector(config=_cfg(), embeddings=FakeEmbeddings(dim=4), client=client)
+
+    # Explicit vector search (bypasses embed_query), with filter/threshold
+    docs = store.similarity_search_by_vector([0.0, 0.0, 0.0, 0.0], k=5, filter={"k": 1}, score_threshold=0.5)
+    assert len(docs) == 1
+    assert docs[0].page_content == "Keep"
+    assert docs[0].metadata["_score"] >= 0.5
+
+
+def test_from_texts_inserts_using_embeddings():
+    client = FakeClient()
+    store = Envector.from_texts(
+        ["A", "B"],
+        metadatas=[{"m": "a"}, {"m": "b"}],
+        embeddings=FakeEmbeddings(dim=4),
+        config=_cfg(),
+        client=client,
+    )
+    assert isinstance(store, Envector)
+    # One batch inserted
+    assert len(client.index.inserted) == 1
+    # Two items packed
+    assert len(client.index.inserted[0]["metadata"]) == 2
+
+
+def test_from_documents_paths_through_to_texts():
+    client = FakeClient()
+    docs = [
+        LC_Document(page_content="X", metadata={"a": 1}),
+        LC_Document(page_content="Y", metadata={"a": 2}),
+    ]
+    store = Envector.from_documents(docs, embeddings=FakeEmbeddings(dim=4), config=_cfg(), client=client)
+    assert isinstance(store, Envector)
+    assert len(client.index.inserted) == 1
+    packed = client.index.inserted[0]["metadata"]
+    # Texts preserved
+    assert any("\"text\": \"X\"" in m for m in packed)
+    assert any("\"text\": \"Y\"" in m for m in packed)
+
+
+def test_add_documents_with_embeddings():
+    client = FakeClient()
+    store = Envector(config=_cfg(), embeddings=FakeEmbeddings(dim=4), client=client)
+
+    docs = [
+        LC_Document(page_content="C1", metadata={"s": 1}),
+        LC_Document(page_content="C2", metadata={"s": 2}),
+    ]
+    ret = store.add_documents(docs)
+    assert len(ret) == 2
+    assert len(client.index.inserted) == 1
+    packed = client.index.inserted[0]["metadata"]
+    assert any("\"text\": \"C1\"" in m for m in packed)
+    assert any("\"text\": \"C2\"" in m for m in packed)
+
+
+def test_add_documents_requires_vectors_when_no_embeddings():
+    client = FakeClient()
+    store = Envector(config=_cfg(), embeddings=None, client=client)
+    docs = [LC_Document(page_content="C", metadata={})]
+    try:
+        store.add_documents(docs)
+        assert False, "Expected ValueError when embeddings is None and no vectors provided"
+    except ValueError as e:
+        assert "embeddings is None and vectors not provided" in str(e)
+
+
+def test_add_documents_with_explicit_vectors():
+    client = FakeClient()
+    store = Envector(config=_cfg(), embeddings=None, client=client)
+
+    docs = [
+        LC_Document(page_content="V1", metadata={"k": "a"}),
+        LC_Document(page_content="V2", metadata={"k": "b"}),
+    ]
+    vecs = [
+        [1.0, 0.0, 0.0, 0.0],
+        [0.0, 1.0, 0.0, 0.0],
+    ]
+    ret = store.add_documents(docs, vectors=vecs)
+    assert len(ret) == 2
+    assert len(client.index.inserted) == 1

From afd1c1f3a6bff0c9c293aa4306206d7cc7682c14 Mon Sep 17 00:00:00 2001
From: inkme <inkme@example.com>
Date: Sun, 12 Oct 2025 13:39:42 +0000
Subject: [PATCH 02/13] chore: ignore VECTORSTORE.md

---
 .gitignore        | 10 +++++++
 VECTORSTORE.md    | 74 -----------------------------------------------
 es2-msa           |  1 -
 run_unit_tests.py | 23 ---------------
 4 files changed, 10 insertions(+), 98 deletions(-)
 delete mode 100644 VECTORSTORE.md
 delete mode 120000 es2-msa
 delete mode 100644 run_unit_tests.py

diff --git a/.gitignore b/.gitignore
index 8a933d1..eca98cc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,6 +36,16 @@ Thumbs.db
 
 # Sensitive local data
 keys/
+VECTORSTORE.md
+
+# External symlinks (local workspace references)
+es2-msa
+es2-msa/
+es2-deploy
+es2-deploy/
+
+# Local helper scripts
+run_unit_tests.py
 
 # Jupyter
 .ipynb_checkpoints/
diff --git a/VECTORSTORE.md b/VECTORSTORE.md
deleted file mode 100644
index 7bcfd3b..0000000
--- a/VECTORSTORE.md
+++ /dev/null
@@ -1,74 +0,0 @@
-# LangChain VectorStore API 지원 현황
-
-이 문서는 LangChain의 VectorStore 인터페이스와 현재 envector에서 지원하는 기능들을 비교 분석한 결과입니다.
-
-## API 지원 현황 테이블
-
-| 메서드 | 설명 | 현재 상태 | 비고 |
-|--------|------|-----------|------|
-| **문서 추가/관리** |
-| `add_documents(documents)` | Document 객체로 문서 추가 | 🔧 구현 가능 | `add_texts` 래핑으로 구현 가능 |
-| `add_texts(texts, metadatas, ids)` | 텍스트로 직접 추가 | ✅ 구현됨 | 완전 지원 |
-| `add_documents(documents)` | 문서 추가 | ✅ 지원 | `add_texts` 위임, 임베딩/벡터 경로 지원 |
-| `upsert_documents(documents)` | 문서 추가/업데이트 | ❌ 구현 불가 | ES2 SDK 제한으로 불가능 |
-| `upsert_texts(texts, metadatas, ids)` | 텍스트 추가/업데이트 | ❌ 구현 불가 | ES2 SDK 제한으로 불가능 |
-| **문서 삭제** |
-| `delete(ids)` | ID로 문서 삭제 | ❌ 구현 불가 | ES2 SDK 제한으로 불가능 |
-| `delete_documents(documents)` | Document 객체로 삭제 | ❌ 구현 불가 | ES2 SDK 제한으로 불가능 |
-| **검색** |
-| `similarity_search(query, k, filter)` | 유사도 검색 | ✅ 구현됨 | 완전 지원 |
-| `similarity_search_with_score(query, k, filter)` | 점수와 함께 유사도 검색 | 🔧 구현 가능 | `_score`를 메타데이터로 제공 중 |
-| `similarity_search_by_vector(embedding, k, filter)` | 벡터로 직접 검색 | ✅ 구현됨 | 완전 지원 |
-| `similarity_search_with_score_by_vector(embedding, k, filter)` | 벡터로 점수와 함께 검색 | 🔧 구현 가능 | `_score`를 메타데이터로 제공 중 |
-| **팩토리 메서드** |
-| `from_texts(texts, embedding, metadatas)` | 텍스트로부터 생성 | ✅ 구현됨 | 완전 지원 |
-| `from_documents(documents, embedding)` | Document로부터 생성 | ✅ 구현됨 | 완전 지원 |
-| **기타** |
-| `as_retriever(**kwargs)` | VectorStoreRetriever로 변환 | ✅ 구현됨 | 완전 지원 |
-
-### 범례
-- ✅ **구현됨**: 현재 완전히 구현되어 사용 가능
-- 🔧 **구현 가능**: 현재 구현되지 않았지만 기술적으로 구현 가능
-- ❌ **구현 불가**: ES2 SDK 제한으로 인해 구현 불가능
-
-## 지원 현황 요약
-
-### ✅ 구현됨 (6개)
-- `add_texts` - 텍스트 추가
-- `similarity_search` - 유사도 검색
-- `similarity_search_by_vector` - 벡터 검색
-- `from_texts` - 팩토리 메서드
-- `from_documents` - 팩토리 메서드
-- `as_retriever` - 리트리버 변환
-
-### 🔧 구현 가능 (3개)
-- `add_documents` - Document 객체 추가 (래핑으로 구현 가능)
-- `similarity_search_with_score` - 점수와 함께 검색 (현재 `_score` 메타데이터로 제공)
-- `similarity_search_with_score_by_vector` - 벡터로 점수와 함께 검색 (현재 `_score` 메타데이터로 제공)
-
-### ❌ 구현 불가 (4개)
-- `add_documents` - Document 리스트 삽입 (지원)
-- `upsert_documents` - 문서 업서트 (ES2 SDK 제한)
-- `upsert_texts` - 텍스트 업서트 (ES2 SDK 제한)
-- `delete` - ID로 삭제 (ES2 SDK 제한)
-- `delete_documents` - Document 삭제 (ES2 SDK 제한)
-
-## 주요 제한사항
-
-1. **개별 문서 삭제/업데이트 불가**: envector는 개별 문서의 삭제나 업데이트를 지원하지 않습니다. 전체 인덱스를 삭제해야 합니다.
-2. **IDs 무시**: `add_documents`/`add_texts`에서 사용자 제공 ID는 무시됩니다. 반환값은 서버 영속 ID가 아닌 일시적 식별자입니다.
-
-2. **upsert 기능 없음**: 문서의 추가/업데이트를 한 번에 처리하는 upsert 기능이 없습니다.
-
-3. **점수 반환 방식**: `similarity_search_with_score` 메서드는 없지만, `similarity_search`에서 `_score`를 메타데이터로 제공합니다.
-
-## 사용 권장사항
-
-- **문서 추가**: `add_texts` 메서드 사용
-- **검색**: `similarity_search` 또는 `similarity_search_by_vector` 사용
-- **점수 확인**: 검색 결과의 `metadata['_score']`에서 점수 확인
-- **RAG 파이프라인**: `as_retriever()`를 사용하여 LangChain의 RAG 워크플로우에 통합
-
-## 호환성
-
-envector는 LangChain의 핵심 VectorStore 기능을 지원하여 기본적인 RAG(Retrieval-Augmented Generation) 워크플로우를 구현하는 데 충분합니다. 다만 개별 문서 관리가 필요한 경우에는 다른 VectorStore 구현체를 고려해야 합니다.
diff --git a/es2-msa b/es2-msa
deleted file mode 120000
index 44b78b8..0000000
--- a/es2-msa
+++ /dev/null
@@ -1 +0,0 @@
-/Users/inkme/git/es2-msa
\ No newline at end of file
diff --git a/run_unit_tests.py b/run_unit_tests.py
deleted file mode 100644
index 35665c7..0000000
--- a/run_unit_tests.py
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/env python3
-"""Run unit tests without requiring a running Envector (ES2) server.
-
-This script runs pytest while excluding tests marked as `integration`.
-It is safe to use in environments without the es2 SDK or server.
-"""
-
-import sys
-import subprocess
-
-
-def main() -> int:
-    cmd = [sys.executable, "-m", "pytest", "-q", "-m", "not integration"]
-    try:
-        return subprocess.call(cmd)
-    except FileNotFoundError:
-        print("pytest not found. Install with: python -m pip install pytest", file=sys.stderr)
-        return 1
-
-
-if __name__ == "__main__":
-    raise SystemExit(main())
-

From 78e6815f525a662465ef9a013a7906e4c00378ba Mon Sep 17 00:00:00 2001
From: inkme <inkme@example.com>
Date: Sun, 12 Oct 2025 13:57:07 +0000
Subject: [PATCH 03/13] ci: add PR checks workflow

---
 .github/workflows/pr.yml                      |  35 ++++++
 .pre-commit-config.yaml                       |  10 ++
 libs/envector/examples/basic_usage.py         |  12 +-
 libs/envector/examples/cipher_query.py        |  16 ++-
 libs/envector/examples/ingest_synthetic_1k.py |  22 +++-
 libs/envector/langchain_envector/__init__.py  |   9 +-
 libs/envector/langchain_envector/client.py    |   6 +-
 libs/envector/langchain_envector/config.py    |   1 -
 libs/envector/langchain_envector/retriever.py |   4 +-
 libs/envector/langchain_envector/types.py     |  24 +++-
 .../langchain_envector/vectorstore.py         |  39 ++++--
 scripts/export_hf_dataset.py                  |  17 ++-
 scripts/make_synthetic_rag_dataset.py         |   4 +-
 scripts/run_unit_tests.py                     |   2 -
 tests/__init__.py                             |   1 -
 tests/conftest.py                             |   3 +-
 tests/integration/test_es2_integration.py     |  73 +++++++++---
 tests/test_types.py                           |   2 -
 tests/test_vectorstore.py                     | 112 +++++++++++++-----
 19 files changed, 292 insertions(+), 100 deletions(-)
 create mode 100644 .github/workflows/pr.yml
 create mode 100644 .pre-commit-config.yaml

diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
new file mode 100644
index 0000000..33e75a2
--- /dev/null
+++ b/.github/workflows/pr.yml
@@ -0,0 +1,35 @@
+name: PR Checks
+
+on:
+  pull_request:
+    branches:
+      - main
+
+concurrency:
+  group: pr-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  checks:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip setuptools wheel
+          python -m pip install -e .
+          python -m pip install pytest pre-commit
+
+      - name: Lint and format
+        run: pre-commit run --all-files --show-diff-on-failure
+
+      - name: Run unit tests
+        run: python -m pytest -q -m "not integration"
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..00caf48
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,10 @@
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.7.1
+    hooks:
+      - id: ruff
+  - repo: https://github.com/psf/black
+    rev: 24.10.0
+    hooks:
+      - id: black
+        language_version: python3.11
diff --git a/libs/envector/examples/basic_usage.py b/libs/envector/examples/basic_usage.py
index 796118b..7936ece 100644
--- a/libs/envector/examples/basic_usage.py
+++ b/libs/envector/examples/basic_usage.py
@@ -8,7 +8,12 @@
 
 from __future__ import annotations
 
-from libs.envector.config import ConnectionConfig, EnvectorConfig, IndexSettings, KeyConfig
+from libs.envector.config import (
+    ConnectionConfig,
+    EnvectorConfig,
+    IndexSettings,
+    KeyConfig,
+)
 from libs.envector.vectorstore import Envector
 
 
@@ -16,7 +21,9 @@ def main():
     # Replace with your actual settings
     cfg = EnvectorConfig(
         connection=ConnectionConfig(address="localhost:50050"),
-        key=KeyConfig(key_path="./keys", key_id="example_key", preset="ip", eval_mode="rmp"),
+        key=KeyConfig(
+            key_path="./keys", key_id="example_key", preset="ip", eval_mode="rmp"
+        ),
         index=IndexSettings(index_name="demo", dim=384, query_encryption="plain"),
         create_if_missing=True,
     )
@@ -43,4 +50,3 @@ def main():
 
 if __name__ == "__main__":
     main()
-
diff --git a/libs/envector/examples/cipher_query.py b/libs/envector/examples/cipher_query.py
index d7defc3..64fe174 100644
--- a/libs/envector/examples/cipher_query.py
+++ b/libs/envector/examples/cipher_query.py
@@ -6,15 +6,24 @@
 
 from __future__ import annotations
 
-from libs.envector.config import ConnectionConfig, EnvectorConfig, IndexSettings, KeyConfig
+from libs.envector.config import (
+    ConnectionConfig,
+    EnvectorConfig,
+    IndexSettings,
+    KeyConfig,
+)
 from libs.envector.vectorstore import Envector
 
 
 def main():
     cfg = EnvectorConfig(
         connection=ConnectionConfig(address="localhost:50050"),
-        key=KeyConfig(key_path="./keys", key_id="example_key", preset="ip", eval_mode="rmp"),
-        index=IndexSettings(index_name="demo_cipher", dim=384, query_encryption="cipher"),
+        key=KeyConfig(
+            key_path="./keys", key_id="example_key", preset="ip", eval_mode="rmp"
+        ),
+        index=IndexSettings(
+            index_name="demo_cipher", dim=384, query_encryption="cipher"
+        ),
         create_if_missing=True,
     )
 
@@ -38,4 +47,3 @@ def main():
 
 if __name__ == "__main__":
     main()
-
diff --git a/libs/envector/examples/ingest_synthetic_1k.py b/libs/envector/examples/ingest_synthetic_1k.py
index dbb0e31..8b055c8 100644
--- a/libs/envector/examples/ingest_synthetic_1k.py
+++ b/libs/envector/examples/ingest_synthetic_1k.py
@@ -19,7 +19,12 @@
 from pathlib import Path
 from typing import List
 
-from libs.envector.config import ConnectionConfig, EnvectorConfig, IndexSettings, KeyConfig
+from libs.envector.config import (
+    ConnectionConfig,
+    EnvectorConfig,
+    IndexSettings,
+    KeyConfig,
+)
 from libs.envector.vectorstore import Envector
 
 
@@ -34,7 +39,12 @@ def main():
     ap.add_argument("--key-path", required=True)
     ap.add_argument("--key-id", required=True)
     ap.add_argument("--index-name", required=True)
-    ap.add_argument("--dim", type=int, required=False, help="If omitted and --use-embeddings, infer from model.")
+    ap.add_argument(
+        "--dim",
+        type=int,
+        required=False,
+        help="If omitted and --use-embeddings, infer from model.",
+    )
     ap.add_argument("--dataset", default="data/synthetic_rag_1k.jsonl")
     ap.add_argument("--use-embeddings", action="store_true")
     ap.add_argument("--model", default="sentence-transformers/all-MiniLM-L6-v2")
@@ -52,7 +62,9 @@ def main():
 
     cfg = EnvectorConfig(
         connection=ConnectionConfig(address=args.address),
-        key=KeyConfig(key_path=args.key_path, key_id=args.key_id, preset="ip", eval_mode="rmp"),
+        key=KeyConfig(
+            key_path=args.key_path, key_id=args.key_id, preset="ip", eval_mode="rmp"
+        ),
         index=IndexSettings(
             index_name=args.index_name,
             dim=(args.dim if args.dim is not None else inferred_dim or 0),
@@ -76,7 +88,9 @@ def main():
         if embeddings is None:
             # Without embeddings, require manual vectors; here we simply skip.
             # Users should provide --use-embeddings or adapt to their vector source.
-            raise ValueError("--use-embeddings is required unless you provide vectors explicitly.")
+            raise ValueError(
+                "--use-embeddings is required unless you provide vectors explicitly."
+            )
         store.add_texts(t_batch, metadatas=m_batch)
 
     print(f"Inserted {len(texts)} documents into index '{args.index_name}'")
diff --git a/libs/envector/langchain_envector/__init__.py b/libs/envector/langchain_envector/__init__.py
index a92cf2b..cf7a9b6 100644
--- a/libs/envector/langchain_envector/__init__.py
+++ b/libs/envector/langchain_envector/__init__.py
@@ -7,5 +7,10 @@
 from .vectorstore import Envector
 from .config import ConnectionConfig, EnvectorConfig, IndexSettings, KeyConfig
 
-__all__ = ["Envector", "ConnectionConfig", "EnvectorConfig", "IndexSettings", "KeyConfig"]
-
+__all__ = [
+    "Envector",
+    "ConnectionConfig",
+    "EnvectorConfig",
+    "IndexSettings",
+    "KeyConfig",
+]
diff --git a/libs/envector/langchain_envector/client.py b/libs/envector/langchain_envector/client.py
index 6329c27..c3ac2f9 100644
--- a/libs/envector/langchain_envector/client.py
+++ b/libs/envector/langchain_envector/client.py
@@ -1,7 +1,5 @@
 from __future__ import annotations
 
-from typing import Optional
-
 from .config import EnvectorConfig
 
 
@@ -34,7 +32,9 @@ def init(self):
         else:
             if not (c.host and c.port):
                 raise ValueError("Either address or host+port must be provided.")
-            es2_client.init_connect(host=c.host, port=c.port, access_token=c.access_token)
+            es2_client.init_connect(
+                host=c.host, port=c.port, access_token=c.access_token
+            )
 
         # Key path baseline for Index
         from es2.index import Index as _Index
diff --git a/libs/envector/langchain_envector/config.py b/libs/envector/langchain_envector/config.py
index b6be7c0..62e5291 100644
--- a/libs/envector/langchain_envector/config.py
+++ b/libs/envector/langchain_envector/config.py
@@ -39,4 +39,3 @@ class EnvectorConfig:
     key: KeyConfig
     index: IndexSettings
     create_if_missing: bool = True
-
diff --git a/libs/envector/langchain_envector/retriever.py b/libs/envector/langchain_envector/retriever.py
index ee578bc..255471c 100644
--- a/libs/envector/langchain_envector/retriever.py
+++ b/libs/envector/langchain_envector/retriever.py
@@ -12,7 +12,9 @@
 
 
 class EnvectorRetriever:
-    def __init__(self, store: Envector, *, search_kwargs: Optional[Dict[str, Any]] = None) -> None:
+    def __init__(
+        self, store: Envector, *, search_kwargs: Optional[Dict[str, Any]] = None
+    ) -> None:
         self.store = store
         self.search_kwargs = search_kwargs or {}
 
diff --git a/libs/envector/langchain_envector/types.py b/libs/envector/langchain_envector/types.py
index e4a82ae..5ad4591 100644
--- a/libs/envector/langchain_envector/types.py
+++ b/libs/envector/langchain_envector/types.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import Any, Callable, Dict, List, Optional, Protocol, Tuple, Union, overload
+from typing import Any, Callable, Dict, List, Optional, Protocol
 
 
 class Embeddings(Protocol):
@@ -10,10 +10,14 @@ class Embeddings(Protocol):
     LangChain-compatible embeddings typically implement these two methods.
     """
 
-    def embed_documents(self, texts: List[str]) -> List[List[float]]:  # pragma: no cover - interface only
+    def embed_documents(
+        self, texts: List[str]
+    ) -> List[List[float]]:  # pragma: no cover - interface only
         ...
 
-    def embed_query(self, text: str) -> List[float]:  # pragma: no cover - interface only
+    def embed_query(
+        self, text: str
+    ) -> List[float]:  # pragma: no cover - interface only
         ...
 
 
@@ -94,8 +98,13 @@ def unpack_metadata(raw: Any) -> Dict[str, Any]:
 
 # --- Embeddings adaptation helpers -----------------------------------------------------
 
+
 class _CallableEmbeddings:
-    def __init__(self, docs_fn: Callable[[List[str]], List[List[float]]], query_fn: Callable[[str], List[float]]):
+    def __init__(
+        self,
+        docs_fn: Callable[[List[str]], List[List[float]]],
+        query_fn: Callable[[str], List[float]],
+    ):
         self._docs_fn = docs_fn
         self._query_fn = query_fn
 
@@ -132,7 +141,12 @@ def query_fn(text: str) -> List[float]:
         return _CallableEmbeddings(docs_fn, query_fn)
 
     # Case 3: Tuple of callables
-    if isinstance(emb, tuple) and len(emb) == 2 and callable(emb[0]) and callable(emb[1]):
+    if (
+        isinstance(emb, tuple)
+        and len(emb) == 2
+        and callable(emb[0])
+        and callable(emb[1])
+    ):
         docs_fn, query_fn = emb  # type: ignore[assignment]
         return _CallableEmbeddings(docs_fn, query_fn)
 
diff --git a/libs/envector/langchain_envector/vectorstore.py b/libs/envector/langchain_envector/vectorstore.py
index 0011deb..0350af9 100644
--- a/libs/envector/langchain_envector/vectorstore.py
+++ b/libs/envector/langchain_envector/vectorstore.py
@@ -1,9 +1,6 @@
 from __future__ import annotations
 
-import json
-from typing import Any, Dict, Iterable, List, Optional, Sequence
-from uuid import uuid4
-
+from typing import Any, Dict, List, Optional
 from .config import EnvectorConfig
 from .client import EnvectorClient
 from .types import Embeddings, as_embeddings, pack_metadata, unpack_metadata
@@ -21,12 +18,15 @@ def _try_import_langchain():
     except Exception:  # pragma: no cover - optional dependency
         # Minimal shim if LangChain is not installed
         class Document:  # type: ignore
-            def __init__(self, page_content: str, metadata: Optional[Dict[str, Any]] = None):
+            def __init__(
+                self, page_content: str, metadata: Optional[Dict[str, Any]] = None
+            ):
                 self.page_content = page_content
                 self.metadata = metadata or {}
 
     try:
         from langchain_core.vectorstores import VectorStore as _VectorStore  # type: ignore
+
         VectorStoreBase = _VectorStore
     except Exception:  # pragma: no cover - optional dependency
         pass
@@ -119,9 +119,15 @@ def similarity_search(
 
         top_k = fetch_k or self.config.index.fetch_k or k
 
-        results = self.client.index.search(query=embedding, top_k=top_k, output_fields=self.config.index.output_fields)
+        results = self.client.index.search(
+            query=embedding, top_k=top_k, output_fields=self.config.index.output_fields
+        )
         # ES2 Index.search returns a list for each query; we passed single query
-        result = results[0] if isinstance(results, list) and results and isinstance(results[0], list) else results
+        result = (
+            results[0]
+            if isinstance(results, list) and results and isinstance(results[0], list)
+            else results
+        )
 
         docs = []
         # Iterate from top-1 to top-k
@@ -129,7 +135,7 @@ def similarity_search(
             # item = {"id": ..., "score": float, "metadata": [str] or {...}}
             score = float(item.get("score", 0.0))
             md_obj_raw = item.get("metadata")
-        
+
             # Metadata encryption/decryption is handled by the SDK.
             # Envector currently supports a single associated data field (string).
             # Convention: if the string is JSON like {"text": str, "metadata": {...}},
@@ -148,7 +154,10 @@ def similarity_search(
             if score_threshold is not None and score < score_threshold:
                 continue
 
-            doc = Document(page_content=text, metadata={**metadata, "_score": score, "_id": item.get("id")})
+            doc = Document(
+                page_content=text,
+                metadata={**metadata, "_score": score, "_id": item.get("id")},
+            )
             docs.append(doc)
 
         # Trim to k after filtering
@@ -198,7 +207,9 @@ def add_documents(
         """
         texts = [getattr(d, "page_content", "") for d in documents]
         metadatas = [getattr(d, "metadata", {}) for d in documents]
-        return self.add_texts(texts=texts, metadatas=metadatas, ids=ids, vectors=vectors, **kwargs)
+        return self.add_texts(
+            texts=texts, metadatas=metadatas, ids=ids, vectors=vectors, **kwargs
+        )
 
     @classmethod
     def from_texts(
@@ -233,7 +244,9 @@ def from_documents(
     ) -> "Envector":  # type: ignore[override]
         texts = [d.page_content for d in documents]
         metadatas = [getattr(d, "metadata", {}) for d in documents]
-        return cls.from_texts(texts=texts, metadatas=metadatas, embeddings=embeddings, **kwargs)
+        return cls.from_texts(
+            texts=texts, metadatas=metadatas, embeddings=embeddings, **kwargs
+        )
 
     # Optional: if LangChain is installed, this will be used; otherwise, users may call similarity_search directly.
     def as_retriever(self, **kwargs: Any):  # pragma: no cover - wrapper
@@ -244,7 +257,9 @@ def as_retriever(self, **kwargs: Any):  # pragma: no cover - wrapper
         except Exception:
             # Minimal shim if VectorStoreRetriever is unavailable
             class _Retriever:
-                def __init__(self, vs: Envector, search_kwargs: Optional[Dict[str, Any]] = None):
+                def __init__(
+                    self, vs: Envector, search_kwargs: Optional[Dict[str, Any]] = None
+                ):
                     self.vs = vs
                     self.search_kwargs = search_kwargs or {}
 
diff --git a/scripts/export_hf_dataset.py b/scripts/export_hf_dataset.py
index d49f299..88de37e 100644
--- a/scripts/export_hf_dataset.py
+++ b/scripts/export_hf_dataset.py
@@ -17,16 +17,22 @@
 import argparse
 import json
 from pathlib import Path
-from typing import List
 
 
 def main():
     ap = argparse.ArgumentParser()
     ap.add_argument("--name", required=True, help="HF dataset name, e.g., ag_news")
-    ap.add_argument("--subset", default=None, help="Optional subset/config of the dataset")
+    ap.add_argument(
+        "--subset", default=None, help="Optional subset/config of the dataset"
+    )
     ap.add_argument("--split", default="train")
     ap.add_argument("--text-column", required=True)
-    ap.add_argument("--meta-columns", nargs="*", default=[], help="Optional metadata columns to carry over")
+    ap.add_argument(
+        "--meta-columns",
+        nargs="*",
+        default=[],
+        help="Optional metadata columns to carry over",
+    )
     ap.add_argument("--size", type=int, default=1000)
     ap.add_argument("--seed", type=int, default=42)
     ap.add_argument("--out", default="data/hf_export.jsonl")
@@ -47,7 +53,9 @@ def main():
     with out_path.open("w", encoding="utf-8") as f:
         for row in ds:
             text = row[args.text_column]
-            meta = {k: row.get(k) for k in args.meta_columns} if args.meta_columns else {}
+            meta = (
+                {k: row.get(k) for k in args.meta_columns} if args.meta_columns else {}
+            )
             rec = {"text": text, "metadata": meta}
             f.write(json.dumps(rec, ensure_ascii=False) + "\n")
 
@@ -56,4 +64,3 @@ def main():
 
 if __name__ == "__main__":
     main()
-
diff --git a/scripts/make_synthetic_rag_dataset.py b/scripts/make_synthetic_rag_dataset.py
index 8badf1c..b9f4b61 100644
--- a/scripts/make_synthetic_rag_dataset.py
+++ b/scripts/make_synthetic_rag_dataset.py
@@ -11,7 +11,6 @@
 
 import argparse
 import json
-import os
 import random
 from pathlib import Path
 
@@ -59,7 +58,7 @@ def make_sentence(topic: str) -> str:
 
 def make_paragraph(topic: str, min_sent: int = 3, max_sent: int = 7) -> str:
     n = random.randint(min_sent, max_sent)
-    return " " .join(make_sentence(topic) for _ in range(n))
+    return " ".join(make_sentence(topic) for _ in range(n))
 
 
 def main():
@@ -86,4 +85,3 @@ def main():
 
 if __name__ == "__main__":
     main()
-
diff --git a/scripts/run_unit_tests.py b/scripts/run_unit_tests.py
index ecd1cdc..3370e56 100644
--- a/scripts/run_unit_tests.py
+++ b/scripts/run_unit_tests.py
@@ -2,7 +2,6 @@
 
 import importlib
 import inspect
-import sys
 import traceback
 
 
@@ -43,4 +42,3 @@ def main() -> int:
 
 if __name__ == "__main__":
     raise SystemExit(main())
-
diff --git a/tests/__init__.py b/tests/__init__.py
index c10059a..070d470 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -13,4 +13,3 @@
     pkg_path = str(_PKG_DIR)
     if pkg_path not in sys.path:
         sys.path.insert(0, pkg_path)
-
diff --git a/tests/conftest.py b/tests/conftest.py
index 47172b6..fe7954e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -24,8 +24,7 @@ class FakeIndex:
 
     def insert(self, data: List[List[float]], metadata: List[str]):
         self.inserted.append({"data": data, "metadata": metadata})
-        batch_idx = len(self.inserted) - 1
-        return [len(self.inserted)+i+1 for i in range(len(metadata))]
+        return [len(self.inserted) + i + 1 for i in range(len(metadata))]
 
     def search(self, query: List[float], top_k: int, output_fields: List[str]):
         if self.search_payload is not None:
diff --git a/tests/integration/test_es2_integration.py b/tests/integration/test_es2_integration.py
index c43661c..a2bf967 100644
--- a/tests/integration/test_es2_integration.py
+++ b/tests/integration/test_es2_integration.py
@@ -5,7 +5,12 @@
 import time
 import pytest
 
-from langchain_envector.config import ConnectionConfig, EnvectorConfig, IndexSettings, KeyConfig
+from langchain_envector.config import (
+    ConnectionConfig,
+    EnvectorConfig,
+    IndexSettings,
+    KeyConfig,
+)
 from langchain_envector.vectorstore import Envector
 
 
@@ -33,13 +38,17 @@ def test_e2e_vectorstore_plain_and_cipher():
     key_path = _require_env("ES2_KEY_PATH")
     key_id = _require_env("ES2_KEY_ID")
     use_emb = os.environ.get("ES2_USE_EMBEDDINGS") in {"1", "true", "TRUE", "yes"}
-    model_name = os.environ.get("ES2_EMB_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
+    model_name = os.environ.get(
+        "ES2_EMB_MODEL", "sentence-transformers/all-MiniLM-L6-v2"
+    )
     use_hf = os.environ.get("ES2_USE_HF_DATASET") in {"1", "true", "TRUE", "yes"}
     hf_name = os.environ.get("ES2_HF_NAME", "ag_news")
     hf_subset = os.environ.get("ES2_HF_SUBSET")
     hf_split = os.environ.get("ES2_HF_SPLIT", "train")
     hf_text_col = os.environ.get("ES2_HF_TEXT_COL", "text")
-    hf_meta_cols = [c for c in os.environ.get("ES2_HF_META_COLS", "label").split(",") if c]
+    hf_meta_cols = [
+        c for c in os.environ.get("ES2_HF_META_COLS", "label").split(",") if c
+    ]
     hf_size = int(os.environ.get("ES2_HF_SIZE", "200"))
     hf_seed = int(os.environ.get("ES2_HF_SEED", "42"))
 
@@ -69,9 +78,12 @@ def test_e2e_vectorstore_plain_and_cipher():
     if dim < 16 or dim > 4096:
         pytest.skip("Envector supports dimensions in [16, 4096]")
 
-    base_index_name = os.environ.get("ES2_INDEX_NAME", f"inttest_{secrets.token_hex(4)}")
+    base_index_name = os.environ.get(
+        "ES2_INDEX_NAME", f"inttest_{secrets.token_hex(4)}"
+    )
 
     import es2
+
     es2.init_connect(address=address)
     es2.reset()
 
@@ -79,7 +91,9 @@ def test_e2e_vectorstore_plain_and_cipher():
     cfg_plain = EnvectorConfig(
         connection=ConnectionConfig(address=address),
         key=KeyConfig(key_path=key_path, key_id=key_id, preset="ip", eval_mode="rmp"),
-        index=IndexSettings(index_name=f"{base_index_name}_plain", dim=dim, query_encryption="plain"),
+        index=IndexSettings(
+            index_name=f"{base_index_name}_plain", dim=dim, query_encryption="plain"
+        ),
         create_if_missing=True,
     )
     store_plain = Envector(config=cfg_plain, embeddings=(emb if use_emb else None))
@@ -93,14 +107,14 @@ def test_e2e_vectorstore_plain_and_cipher():
         if hf_size and hf_size < len(ds):
             ds = ds.shuffle(seed=hf_seed).select(range(hf_size))
         texts = [row[hf_text_col] for row in ds]
-        metas = [
-            {k: row.get(k) for k in hf_meta_cols if k in row}
-            for row in ds
-        ]
+        metas = [{k: row.get(k) for k in hf_meta_cols if k in row} for row in ds]
         print(texts[0])
         print(metas[0])
     else:
-        texts = ["machine learning accelerates research", "cooking recipes are delicious"]
+        texts = [
+            "machine learning accelerates research",
+            "cooking recipes are delicious",
+        ]
         metas = [{"label": "A"}, {"label": "B"}]
 
     if use_emb:
@@ -120,21 +134,34 @@ def test_e2e_vectorstore_plain_and_cipher():
         docs = store_plain.similarity_search(q1, k=3)
         print("[plain] top-3 results for:", q1)
         for d in docs:
-            print(" - score=", d.metadata.get("_score"), "text=", (d.page_content[:80] + ("..." if len(d.page_content) > 80 else "")))
+            print(
+                " - score=",
+                d.metadata.get("_score"),
+                "text=",
+                (d.page_content[:80] + ("..." if len(d.page_content) > 80 else "")),
+            )
         assert len(docs) >= 1
         assert all("_id" in d.metadata for d in docs)
         # optional filter check if 'label' is part of meta
         if not use_hf:
-            docs_f = store_plain.similarity_search("cooking", k=2, filter={"label": "B"})
+            docs_f = store_plain.similarity_search(
+                "cooking", k=2, filter={"label": "B"}
+            )
             print("[plain] filtered results (label=B):", [d.metadata for d in docs_f])
-            assert len(docs_f) >= 1 and all(d.metadata.get("label") == "B" for d in docs_f)
+            assert len(docs_f) >= 1 and all(
+                d.metadata.get("label") == "B" for d in docs_f
+            )
     else:
         # Using explicit embeddings
         docs = store_plain.similarity_search("q", k=2, embedding=e1)
-        print("[plain] results (explicit embedding e1):", [d.page_content for d in docs])
+        print(
+            "[plain] results (explicit embedding e1):", [d.page_content for d in docs]
+        )
         assert any(d.page_content == texts[0] for d in docs)
         assert all("_id" in d.metadata for d in docs)
-        docs_f = store_plain.similarity_search("q", k=2, embedding=e2, filter={"label": "B"})
+        docs_f = store_plain.similarity_search(
+            "q", k=2, embedding=e2, filter={"label": "B"}
+        )
         print("[plain] filtered (e2, label=B):", [d.page_content for d in docs_f])
         assert len(docs_f) >= 1
         assert docs_f[0].page_content == texts[1]
@@ -143,7 +170,9 @@ def test_e2e_vectorstore_plain_and_cipher():
     cfg_cc = EnvectorConfig(
         connection=ConnectionConfig(address=address),
         key=KeyConfig(key_path=key_path, key_id=key_id, preset="ip", eval_mode="rmp"),
-        index=IndexSettings(index_name=f"{base_index_name}_cipher", dim=dim, query_encryption="cipher"),
+        index=IndexSettings(
+            index_name=f"{base_index_name}_cipher", dim=dim, query_encryption="cipher"
+        ),
         create_if_missing=True,
     )
     store_cc = Envector(config=cfg_cc, embeddings=(emb if use_emb else None))
@@ -158,12 +187,20 @@ def test_e2e_vectorstore_plain_and_cipher():
         docs_cc = store_cc.similarity_search(q2, k=3)
         print("[cipher] top-3 results for:", q2)
         for d in docs_cc:
-            print(" - score=", d.metadata.get("_score"), "text=", (d.page_content[:80] + ("..." if len(d.page_content) > 80 else "")))
+            print(
+                " - score=",
+                d.metadata.get("_score"),
+                "text=",
+                (d.page_content[:80] + ("..." if len(d.page_content) > 80 else "")),
+            )
         assert len(docs_cc) >= 1
         assert all("_id" in d.metadata for d in docs_cc)
     else:
         docs_cc = store_cc.similarity_search("q", k=2, embedding=e2)
-        print("[cipher] results (explicit embedding e2):", [d.page_content for d in docs_cc])
+        print(
+            "[cipher] results (explicit embedding e2):",
+            [d.page_content for d in docs_cc],
+        )
         assert any(d.page_content == texts[1] for d in docs_cc)
         assert all("_id" in d.metadata for d in docs_cc)
 
diff --git a/tests/test_types.py b/tests/test_types.py
index c85c371..65cd546 100644
--- a/tests/test_types.py
+++ b/tests/test_types.py
@@ -1,7 +1,5 @@
 from __future__ import annotations
 
-import json
-
 from langchain_envector.types import pack_metadata, unpack_metadata
 
 
diff --git a/tests/test_vectorstore.py b/tests/test_vectorstore.py
index 50c4920..b572ee5 100644
--- a/tests/test_vectorstore.py
+++ b/tests/test_vectorstore.py
@@ -1,8 +1,11 @@
 from __future__ import annotations
 
-import re
-
-from langchain_envector.config import ConnectionConfig, EnvectorConfig, IndexSettings, KeyConfig
+from langchain_envector.config import (
+    ConnectionConfig,
+    EnvectorConfig,
+    IndexSettings,
+    KeyConfig,
+)
 from langchain_envector.vectorstore import Envector, Document as LC_Document
 
 from .conftest import FakeClient, FakeEmbeddings, FakeIndex
@@ -20,7 +23,9 @@ def test_add_texts_ignores_ids_and_returns_item_ids():
     client = FakeClient()
     store = Envector(config=_cfg(), embeddings=FakeEmbeddings(dim=4), client=client)
 
-    ret_ids = store.add_texts(["t1", "t2"], metadatas=[{"m": 1}, {"m": 2}], ids=["a", "b"])  # ids ignored
+    ret_ids = store.add_texts(
+        ["t1", "t2"], metadatas=[{"m": 1}, {"m": 2}], ids=["a", "b"]
+    )  # ids ignored
 
     # Returned IDs
     assert len(ret_ids) == 2
@@ -30,20 +35,32 @@ def test_add_texts_ignores_ids_and_returns_item_ids():
     assert len(client.index.inserted) == 1
     packed = client.index.inserted[0]["metadata"]
     assert len(packed) == 2
-    assert "\"id\"" not in packed[0]
+    assert '"id"' not in packed[0]
 
 
 def test_similarity_search_with_filter_and_threshold():
     index = FakeIndex()
     # Two items, different scores and tags
-    index.search_payload = [[
-        {"id": "pos-0", "score": 0.95, "metadata": "{\"text\": \"A\", \"metadata\": {\"tag\": \"keep\"}}"},
-        {"id": "pos-1", "score": 0.40, "metadata": "{\"text\": \"B\", \"metadata\": {\"tag\": \"drop\"}}"},
-    ]]
+    index.search_payload = [
+        [
+            {
+                "id": "pos-0",
+                "score": 0.95,
+                "metadata": '{"text": "A", "metadata": {"tag": "keep"}}',
+            },
+            {
+                "id": "pos-1",
+                "score": 0.40,
+                "metadata": '{"text": "B", "metadata": {"tag": "drop"}}',
+            },
+        ]
+    ]
     client = FakeClient(index)
     store = Envector(config=_cfg(), embeddings=FakeEmbeddings(dim=4), client=client)
 
-    docs = store.similarity_search("q", k=5, filter={"tag": "keep"}, score_threshold=0.5)
+    docs = store.similarity_search(
+        "q", k=5, filter={"tag": "keep"}, score_threshold=0.5
+    )
     assert len(docs) == 1
     assert docs[0].page_content == "A"
     assert docs[0].metadata["_score"] >= 0.5
@@ -52,9 +69,15 @@ def test_similarity_search_with_filter_and_threshold():
 def test_similarity_search_handles_string_metadata():
     index = FakeIndex()
     # metadata returned as a single JSON string instead of list
-    index.search_payload = [[
-        {"id": "pos-0", "score": 0.8, "metadata": "{\"text\": \"S\", \"metadata\": {\"t\": 1}}"},
-    ]]
+    index.search_payload = [
+        [
+            {
+                "id": "pos-0",
+                "score": 0.8,
+                "metadata": '{"text": "S", "metadata": {"t": 1}}',
+            },
+        ]
+    ]
     client = FakeClient(index)
     store = Envector(config=_cfg(), embeddings=FakeEmbeddings(dim=4), client=client)
 
@@ -67,9 +90,15 @@ def test_similarity_search_handles_string_metadata():
 def test_similarity_search_uses_raw_text_when_not_json():
     index = FakeIndex()
     # metadata is a plain string (not JSON); should be treated as page_content
-    index.search_payload = [[
-        {"id": "pos-raw", "score": 0.6, "metadata": "Plain text content without JSON"},
-    ]]
+    index.search_payload = [
+        [
+            {
+                "id": "pos-raw",
+                "score": 0.6,
+                "metadata": "Plain text content without JSON",
+            },
+        ]
+    ]
     client = FakeClient(index)
     store = Envector(config=_cfg(), embeddings=FakeEmbeddings(dim=4), client=client)
 
@@ -77,15 +106,19 @@ def test_similarity_search_uses_raw_text_when_not_json():
     assert len(docs) == 1
     assert docs[0].page_content == "Plain text content without JSON"
     # user metadata should be empty dict when not provided
-    assert all(k in docs[0].metadata for k in ["_score", "_id"])  # only system fields present
+    assert all(
+        k in docs[0].metadata for k in ["_score", "_id"]
+    )  # only system fields present
 
 
 def test_similarity_search_handles_python_literal_metadata():
     index = FakeIndex()
     literal = str({"text": "Literal", "metadata": {"tag": "py"}})
-    index.search_payload = [[
-        {"id": "pos-lit", "score": 0.7, "metadata": literal},
-    ]]
+    index.search_payload = [
+        [
+            {"id": "pos-lit", "score": 0.7, "metadata": literal},
+        ]
+    ]
     client = FakeClient(index)
     store = Envector(config=_cfg(), embeddings=FakeEmbeddings(dim=4), client=client)
 
@@ -94,21 +127,32 @@ def test_similarity_search_handles_python_literal_metadata():
     assert docs[0].page_content == "Literal"
     assert docs[0].metadata.get("tag") == "py"
 
-
     # dict-type metadata is not supported currently; only text-based
 
 
 def test_similarity_search_by_vector_with_filter_and_threshold():
     index = FakeIndex()
-    index.search_payload = [[
-        {"id": "v-0", "score": 0.88, "metadata": "{\"text\": \"Keep\", \"metadata\": {\"k\": 1}}"},
-        {"id": "v-1", "score": 0.30, "metadata": "{\"text\": \"Drop\", \"metadata\": {\"k\": 2}}"},
-    ]]
+    index.search_payload = [
+        [
+            {
+                "id": "v-0",
+                "score": 0.88,
+                "metadata": '{"text": "Keep", "metadata": {"k": 1}}',
+            },
+            {
+                "id": "v-1",
+                "score": 0.30,
+                "metadata": '{"text": "Drop", "metadata": {"k": 2}}',
+            },
+        ]
+    ]
     client = FakeClient(index)
     store = Envector(config=_cfg(), embeddings=FakeEmbeddings(dim=4), client=client)
 
     # Explicit vector search (bypasses embed_query), with filter/threshold
-    docs = store.similarity_search_by_vector([0.0, 0.0, 0.0, 0.0], k=5, filter={"k": 1}, score_threshold=0.5)
+    docs = store.similarity_search_by_vector(
+        [0.0, 0.0, 0.0, 0.0], k=5, filter={"k": 1}, score_threshold=0.5
+    )
     assert len(docs) == 1
     assert docs[0].page_content == "Keep"
     assert docs[0].metadata["_score"] >= 0.5
@@ -136,13 +180,15 @@ def test_from_documents_paths_through_to_texts():
         LC_Document(page_content="X", metadata={"a": 1}),
         LC_Document(page_content="Y", metadata={"a": 2}),
     ]
-    store = Envector.from_documents(docs, embeddings=FakeEmbeddings(dim=4), config=_cfg(), client=client)
+    store = Envector.from_documents(
+        docs, embeddings=FakeEmbeddings(dim=4), config=_cfg(), client=client
+    )
     assert isinstance(store, Envector)
     assert len(client.index.inserted) == 1
     packed = client.index.inserted[0]["metadata"]
     # Texts preserved
-    assert any("\"text\": \"X\"" in m for m in packed)
-    assert any("\"text\": \"Y\"" in m for m in packed)
+    assert any('"text": "X"' in m for m in packed)
+    assert any('"text": "Y"' in m for m in packed)
 
 
 def test_add_documents_with_embeddings():
@@ -157,8 +203,8 @@ def test_add_documents_with_embeddings():
     assert len(ret) == 2
     assert len(client.index.inserted) == 1
     packed = client.index.inserted[0]["metadata"]
-    assert any("\"text\": \"C1\"" in m for m in packed)
-    assert any("\"text\": \"C2\"" in m for m in packed)
+    assert any('"text": "C1"' in m for m in packed)
+    assert any('"text": "C2"' in m for m in packed)
 
 
 def test_add_documents_requires_vectors_when_no_embeddings():
@@ -167,7 +213,9 @@ def test_add_documents_requires_vectors_when_no_embeddings():
     docs = [LC_Document(page_content="C", metadata={})]
     try:
         store.add_documents(docs)
-        assert False, "Expected ValueError when embeddings is None and no vectors provided"
+        assert (
+            False
+        ), "Expected ValueError when embeddings is None and no vectors provided"
     except ValueError as e:
         assert "embeddings is None and vectors not provided" in str(e)
 

From a1d89c632bf5cdc4f33e8bba09e618eedf8f4592 Mon Sep 17 00:00:00 2001
From: Jungjoo Seo <115966721+inkme9@users.noreply.github.com>
Date: Sun, 12 Oct 2025 23:08:16 +0900
Subject: [PATCH 04/13] Update runs-on

---
 .github/workflows/pr.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
index 33e75a2..7d4ab9b 100644
--- a/.github/workflows/pr.yml
+++ b/.github/workflows/pr.yml
@@ -11,7 +11,7 @@ concurrency:
 
 jobs:
   checks:
-    runs-on: ubuntu-latest
+    runs-on: [self-hosted, linux, no-gpu]
 
     steps:
       - name: Checkout repository

From b7171b0e8f1d9f1b9ddafd13521f0dbc5d467847 Mon Sep 17 00:00:00 2001
From: inkme <inkme@example.com>
Date: Sun, 12 Oct 2025 23:53:49 +0000
Subject: [PATCH 05/13] ES2-979: add score-returning similarity search APIs

---
 .../langchain_envector/vectorstore.py         | 106 ++++++++++++++----
 tests/test_vectorstore.py                     |  52 +++++++++
 2 files changed, 137 insertions(+), 21 deletions(-)

diff --git a/libs/envector/langchain_envector/vectorstore.py b/libs/envector/langchain_envector/vectorstore.py
index 0350af9..eef1cb5 100644
--- a/libs/envector/langchain_envector/vectorstore.py
+++ b/libs/envector/langchain_envector/vectorstore.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple
 from .config import EnvectorConfig
 from .client import EnvectorClient
 from .types import Embeddings, as_embeddings, pack_metadata, unpack_metadata
@@ -96,27 +96,16 @@ def add_texts(
         # but they are NOT persisted/addressable.
         return result_ids
 
-    def similarity_search(
+    def _similarity_search_with_scores(
         self,
-        query: str,
-        k: int = 4,
         *,
+        embedding: List[float],
+        k: int,
         filter: Optional[Dict[str, Any]] = None,
         score_threshold: Optional[float] = None,
         fetch_k: Optional[int] = None,
         **kwargs: Any,
-    ) -> List[Document]:
-        """Search similar items for a text query.
-
-        - Embeds query if embeddings are provided; else expect `embedding` kwarg.
-        - Applies optional client-side filter and score threshold.
-        """
-        embedding: Optional[List[float]] = kwargs.get("embedding")
-        if embedding is None:
-            if self._embeddings is None:
-                raise ValueError("embeddings is None and no `embedding` provided")
-            embedding = self._embeddings.embed_query(query)
-
+    ) -> List[Tuple[Document, float]]:
         top_k = fetch_k or self.config.index.fetch_k or k
 
         results = self.client.index.search(
@@ -129,7 +118,7 @@ def similarity_search(
             else results
         )
 
-        docs = []
+        docs_with_scores: List[Tuple[Document, float]] = []
         # Iterate from top-1 to top-k
         for item in result:
             # item = {"id": ..., "score": float, "metadata": [str] or {...}}
@@ -158,10 +147,66 @@ def similarity_search(
                 page_content=text,
                 metadata={**metadata, "_score": score, "_id": item.get("id")},
             )
-            docs.append(doc)
+            docs_with_scores.append((doc, score))
 
         # Trim to k after filtering
-        return docs[:k]
+        return docs_with_scores[:k]
+
+    def similarity_search(
+        self,
+        query: str,
+        k: int = 4,
+        *,
+        filter: Optional[Dict[str, Any]] = None,
+        score_threshold: Optional[float] = None,
+        fetch_k: Optional[int] = None,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Search similar items for a text query.
+
+        - Embeds query if embeddings are provided; else expect `embedding` kwarg.
+        - Applies optional client-side filter and score threshold.
+        """
+        embedding: Optional[List[float]] = kwargs.pop("embedding", None)
+        if embedding is None:
+            if self._embeddings is None:
+                raise ValueError("embeddings is None and no `embedding` provided")
+            embedding = self._embeddings.embed_query(query)
+
+        docs_with_scores = self._similarity_search_with_scores(
+            embedding=embedding,
+            k=k,
+            filter=filter,
+            score_threshold=score_threshold,
+            fetch_k=fetch_k,
+            **kwargs,
+        )
+        return [doc for doc, _ in docs_with_scores]
+
+    def similarity_search_with_score(
+        self,
+        query: str,
+        k: int = 4,
+        *,
+        filter: Optional[Dict[str, Any]] = None,
+        score_threshold: Optional[float] = None,
+        fetch_k: Optional[int] = None,
+        **kwargs: Any,
+    ) -> List[Tuple[Document, float]]:
+        embedding: Optional[List[float]] = kwargs.pop("embedding", None)
+        if embedding is None:
+            if self._embeddings is None:
+                raise ValueError("embeddings is None and no `embedding` provided")
+            embedding = self._embeddings.embed_query(query)
+
+        return self._similarity_search_with_scores(
+            embedding=embedding,
+            k=k,
+            filter=filter,
+            score_threshold=score_threshold,
+            fetch_k=fetch_k,
+            **kwargs,
+        )
 
     # Vector-based variant required by some VectorStore interfaces
     def similarity_search_by_vector(
@@ -174,13 +219,32 @@ def similarity_search_by_vector(
         fetch_k: Optional[int] = None,
         **kwargs: Any,
     ) -> List[Document]:
-        return self.similarity_search(
-            query="",  # unused
+        docs_with_scores = self._similarity_search_with_scores(
+            embedding=embedding,
             k=k,
             filter=filter,
             score_threshold=score_threshold,
             fetch_k=fetch_k,
+            **kwargs,
+        )
+        return [doc for doc, _ in docs_with_scores]
+
+    def similarity_search_with_score_by_vector(
+        self,
+        embedding: List[float],
+        k: int = 4,
+        *,
+        filter: Optional[Dict[str, Any]] = None,
+        score_threshold: Optional[float] = None,
+        fetch_k: Optional[int] = None,
+        **kwargs: Any,
+    ) -> List[Tuple[Document, float]]:
+        return self._similarity_search_with_scores(
             embedding=embedding,
+            k=k,
+            filter=filter,
+            score_threshold=score_threshold,
+            fetch_k=fetch_k,
             **kwargs,
         )
 
diff --git a/tests/test_vectorstore.py b/tests/test_vectorstore.py
index b572ee5..8178c52 100644
--- a/tests/test_vectorstore.py
+++ b/tests/test_vectorstore.py
@@ -158,6 +158,58 @@ def test_similarity_search_by_vector_with_filter_and_threshold():
     assert docs[0].metadata["_score"] >= 0.5
 
 
+def test_similarity_search_with_score_returns_tuples():
+    index = FakeIndex()
+    index.search_payload = [
+        [
+            {
+                "id": "s-0",
+                "score": 0.77,
+                "metadata": '{"text": "Doc0", "metadata": {"tag": "x"}}',
+            },
+            {
+                "id": "s-1",
+                "score": 0.25,
+                "metadata": '{"text": "Doc1", "metadata": {"tag": "y"}}',
+            },
+        ]
+    ]
+    client = FakeClient(index)
+    store = Envector(config=_cfg(), embeddings=FakeEmbeddings(dim=4), client=client)
+
+    results = store.similarity_search_with_score("query", k=2)
+    assert len(results) == 2
+    first_doc, first_score = results[0]
+    assert isinstance(first_doc, LC_Document)
+    assert first_doc.page_content == "Doc0"
+    assert first_doc.metadata["_score"] == first_score
+    assert first_doc.metadata["_id"] == "s-0"
+
+
+def test_similarity_search_with_score_by_vector_returns_tuples():
+    index = FakeIndex()
+    index.search_payload = [
+        [
+            {
+                "id": "sv-0",
+                "score": 0.66,
+                "metadata": '{"text": "VectorDoc", "metadata": {"tag": "keep"}}',
+            }
+        ]
+    ]
+    client = FakeClient(index)
+    store = Envector(config=_cfg(), embeddings=FakeEmbeddings(dim=4), client=client)
+
+    results = store.similarity_search_with_score_by_vector(
+        [0.0, 0.0, 0.0, 0.0], k=1, filter={"tag": "keep"}, score_threshold=0.5
+    )
+    assert len(results) == 1
+    doc, score = results[0]
+    assert doc.page_content == "VectorDoc"
+    assert score == doc.metadata["_score"]
+    assert doc.metadata["_id"] == "sv-0"
+
+
 def test_from_texts_inserts_using_embeddings():
     client = FakeClient()
     store = Envector.from_texts(

From 78d2ce24d0c2be107a538dd222a174f71dbc1d25 Mon Sep 17 00:00:00 2001
From: suyeong <suyeong@cryptolab.co.kr>
Date: Thu, 13 Nov 2025 06:01:44 +0000
Subject: [PATCH 06/13] fix readme

---
 README.md | 47 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 7a231ae..9a616b3 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ Encrypted vector search for LangChain using Envector (ES2), powered by homomorph
   - `python3.11 -m venv .venv && source .venv/bin/activate`
 - Install runtime dependencies:
   - `pip install -U pip setuptools wheel`
-  - `pip install es2==1.1.0rc2 langchain sentence-transformers`
+  - `pip install es2 langchain sentence-transformers`
 
 ## Usage Overview
 1. Configure Envector using `EnvectorConfig`, pointing to your ES2 endpoint and keys.
@@ -27,7 +27,7 @@ Encrypted vector search for LangChain using Envector (ES2), powered by homomorph
 Key dataclasses live in `libs/envector/config.py`:
 - `ConnectionConfig`: address or host/port for ES2.
 - `KeyConfig`: key path, key ID, optional preset/eval mode.
-- `IndexSettings`: index name, dimension (16–4096), query encryption mode, optional output fields and fetch parameters.
+- `IndexSettings`: index name, dimension (32–4096), query encryption mode, optional output fields and fetch parameters.
 - `EnvectorConfig`: wraps the above and enables auto-creation via `create_if_missing`.
 
 ## Data Model
@@ -42,15 +42,42 @@ Key dataclasses live in `libs/envector/config.py`:
 - Filtering happens client-side; ensure metadata is JSON for structured filters.
 
 ## Examples
+- Configuration
+
+  ```python
+  from langchain_envector.config import ConnectionConfig, EnvectorConfig, IndexSettings, KeyConfig
+
+  cfg = EnvectorConfig(
+      connection=ConnectionConfig(address=ES2_ADDRESS, access_token=ES2_ACCESS_TOKEN),
+      key=KeyConfig(key_path=ES2_KEY_PATH, key_id=ES2_KEY_ID, preset="ip", eval_mode="rmp"),
+      index=IndexSettings(index_name=INDEX_NAME, dim=DIM),
+      create_if_missing=True,
+  )
+  ```
+
 - Add documents (from LangChain Documents):
-  - Python
-    - from langchain_core.documents import Document
-    - docs = [
-        Document(page_content="chunk-1", metadata={"source": "paper.pdf", "page": 1, "chunk": 0}),
-        Document(page_content="chunk-2", metadata={"source": "paper.pdf", "page": 1, "chunk": 1}),
-      ]
-    - store = Envector(config=cfg, embeddings=emb)
-    - store.add_documents(docs)
+
+  ```python
+  from langchain_core.documents import Document
+  from langchain_envector.vectorstore import Envector
+
+  docs = [Document(page_content="chunk-1", metadata={"source": "doc.pdf", "page": 1, "chunk": 0})]
+
+  store = Envector(config=cfg, embeddings=emb)
+  store.add_documents(docs)
+  ```
+
+  The method `add_texts` is also available to store texts.
+
+- Similarity search
+
+  ```python
+  results = store.similarity_search_with_score(query, k=3)
+  for doc, score in results:
+      print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
+  ```
+
+  The methods `similarity_search` and `similarity_search_with_vector` are also available to perform vector search.
 
 ## Troubleshooting
 - Connection issues: verify ES2 address and registered keys.

From 09277ae94ef5dcc705a7355560d4e096eb58dbb3 Mon Sep 17 00:00:00 2001
From: suyeong <suyeong@cryptolab.co.kr>
Date: Thu, 13 Nov 2025 06:06:48 +0000
Subject: [PATCH 07/13] fix readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9a616b3..20e4362 100644
--- a/README.md
+++ b/README.md
@@ -77,7 +77,7 @@ Key dataclasses live in `libs/envector/config.py`:
       print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
   ```
 
-  The methods `similarity_search` and `similarity_search_with_vector` are also available to perform vector search.
+  The methods `similarity_search` and `similarity_search_with_vector` (with `embeddings.embed_query()`) are also available to perform vector search.
 
 ## Troubleshooting
 - Connection issues: verify ES2 address and registered keys.

From f5351b7dc95ea72206b7d3e7ba102bfbf003987d Mon Sep 17 00:00:00 2001
From: suyeong <suyeong@cryptolab.co.kr>
Date: Thu, 13 Nov 2025 06:17:03 +0000
Subject: [PATCH 08/13] update pytest in readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 20e4362..14fa220 100644
--- a/README.md
+++ b/README.md
@@ -87,7 +87,7 @@ Key dataclasses live in `libs/envector/config.py`:
 ## Testing Without ES2
 - Run unit tests offline (no ES2 or SDK required):
   - `python -m pytest -q -m "not integration"`
-  - or `python run_unit_tests.py`
+  - or `python scripts/run_unit_tests.py`
 - Run integration tests (requires server and keys):
   - Export `ES2_ADDRESS`, `ES2_KEY_PATH`, `ES2_KEY_ID`
   - Optional: `ES2_USE_EMBEDDINGS=1`, `ES2_EMB_MODEL`, `ES2_USE_HF_DATASET=1`

From 057c8bb231066d441cb53a674b6b1b0313373c39 Mon Sep 17 00:00:00 2001
From: suyeong <suyeong@cryptolab.co.kr>
Date: Thu, 13 Nov 2025 06:17:34 +0000
Subject: [PATCH 09/13] rm debug log?

---
 libs/envector/langchain_envector/types.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libs/envector/langchain_envector/types.py b/libs/envector/langchain_envector/types.py
index 5ad4591..286a24c 100644
--- a/libs/envector/langchain_envector/types.py
+++ b/libs/envector/langchain_envector/types.py
@@ -57,8 +57,6 @@ def unpack_metadata(raw: Any) -> Dict[str, Any]:
     if isinstance(raw, dict):
         return raw
 
-    print("slafjklshglkhslafhlksadjlghsal;hf")
-
     # Some responses wrap the payload in a single-element list.
     if isinstance(raw, (list, tuple)):
         if len(raw) == 1:

From 430cb45dc13134dc235b403832c62188e146a4a8 Mon Sep 17 00:00:00 2001
From: suyeong <suyeong@cryptolab.co.kr>
Date: Thu, 13 Nov 2025 06:18:23 +0000
Subject: [PATCH 10/13] fix python tests path in unit test script

---
 scripts/run_unit_tests.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/scripts/run_unit_tests.py b/scripts/run_unit_tests.py
index 3370e56..489698b 100644
--- a/scripts/run_unit_tests.py
+++ b/scripts/run_unit_tests.py
@@ -2,7 +2,13 @@
 
 import importlib
 import inspect
+import sys
 import traceback
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+if str(REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(REPO_ROOT))
 
 
 def run_module_tests(module_name: str) -> list[tuple[str, bool, str]]:

From 16ddaeb2be488f69c583f77a49ecbf181b5b6d81 Mon Sep 17 00:00:00 2001
From: suyeong <suyeong@cryptolab.co.kr>
Date: Thu, 13 Nov 2025 06:18:54 +0000
Subject: [PATCH 11/13] fix minimum dimension in pytest

---
 tests/integration/test_es2_integration.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/integration/test_es2_integration.py b/tests/integration/test_es2_integration.py
index a2bf967..03aeb81 100644
--- a/tests/integration/test_es2_integration.py
+++ b/tests/integration/test_es2_integration.py
@@ -73,10 +73,10 @@ def test_e2e_vectorstore_plain_and_cipher():
             except Exception as e:
                 pytest.skip(f"Embeddings requested but unavailable: {e}")
     else:
-        dim = int(dim_env or "16")
+        dim = int(dim_env or "32")
 
-    if dim < 16 or dim > 4096:
-        pytest.skip("Envector supports dimensions in [16, 4096]")
+    if dim < 32 or dim > 4096:
+        pytest.skip("Envector supports dimensions in [32, 4096]")
 
     base_index_name = os.environ.get(
         "ES2_INDEX_NAME", f"inttest_{secrets.token_hex(4)}"

From ef527eaa92692977248e980f8351b4a7c1c3befc Mon Sep 17 00:00:00 2001
From: suyeong <suyeong@cryptolab.co.kr>
Date: Mon, 8 Dec 2025 00:59:45 +0000
Subject: [PATCH 12/13] update renamed sdk

---
 .github/workflows/release.yaml                |  2 +-
 .gitignore                                    |  4 +-
 CONTRIBUTE.md                                 |  8 +--
 README.md                                     | 28 +++++-----
 libs/envector/README.md                       |  6 +--
 libs/envector/examples/basic_usage.py         |  2 +-
 libs/envector/examples/ingest_synthetic_1k.py |  2 +-
 libs/envector/langchain_envector/__init__.py  |  2 +-
 libs/envector/langchain_envector/client.py    | 32 ++++++------
 libs/envector/langchain_envector/types.py     |  8 +--
 .../langchain_envector/vectorstore.py         | 12 ++---
 pyproject.toml                                |  8 +--
 pytest.ini                                    |  2 +-
 tests/integration/test_es2_integration.py     | 52 +++++++++----------
 tests/requirements.txt                        |  4 +-
 15 files changed, 85 insertions(+), 87 deletions(-)

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 24705f5..78e880d 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -192,7 +192,7 @@ jobs:
           MAX_ATTEMPTS=30
           SLEEP_SECONDS=10
 
-          echo "Attempting to install es2==${WHEEL_VERSION} from TestPyPI..."
+          echo "Attempting to install pyenvector==${WHEEL_VERSION} from TestPyPI..."
           ATTEMPTS=0
           while true; do
             ATTEMPTS=$((ATTEMPTS + 1))
diff --git a/.gitignore b/.gitignore
index eca98cc..cfda40f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,10 +39,8 @@ keys/
 VECTORSTORE.md
 
 # External symlinks (local workspace references)
-es2-msa
 es2-msa/
-es2-deploy
-es2-deploy/
+envector-deployment/
 
 # Local helper scripts
 run_unit_tests.py
diff --git a/CONTRIBUTE.md b/CONTRIBUTE.md
index a2e8f13..6007c29 100644
--- a/CONTRIBUTE.md
+++ b/CONTRIBUTE.md
@@ -13,16 +13,16 @@ Thanks for your interest in improving the project! This guide covers local setup
 
 ## Testing
 - **Unit tests** (fakes only): `python run_unit_tests.py`
-- **Integration tests** (requires ES2 server + keys):
-  - Export `ES2_ADDRESS`, `ES2_KEY_PATH`, `ES2_KEY_ID`
-  - Optional: `ES2_USE_EMBEDDINGS=1`, `ES2_EMB_MODEL`, `ES2_USE_HF_DATASET=1`
+- **Integration tests** (requires EnVector server + keys):
+  - Export `ENVECTOR_ADDRESS`, `ENVECTOR_KEY_PATH`, `ENVECTOR_KEY_ID`
+  - Optional: `ENVECTOR_USE_EMBEDDINGS=1`, `ENVECTOR_EMB_MODEL`, `ENVECTOR_USE_HF_DATASET=1`
   - Run `pytest -m integration -s`
 
 Please run relevant tests before submitting a PR and mention coverage in the description.
 
 ## Development Guidelines
 - Keep code, comments, and docs in English.
-- Prefer the high-level `es2` SDK APIs; avoid direct gRPC/indexer calls unless required.
+- Prefer the high-level `pyenvector` SDK APIs; avoid direct gRPC/indexer calls unless required.
 - Keep changes focused and documented; update README or notebooks when behavior changes.
 - Follow existing formatting and type-hint conventions.
 
diff --git a/README.md b/README.md
index ac6fea8..393e18d 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # LangChain Envector Integration
 
-Encrypted vector search for LangChain using Envector (ES2), powered by homomorphic encryption (CKKS). This repo ships a LangChain-compatible VectorStore and retriever utilities built on the high-level `es2` Python SDK.
+Encrypted vector search for LangChain using Envector, powered by homomorphic encryption (CKKS). This repo ships a LangChain-compatible VectorStore and retriever utilities built on the high-level `pyenvector` Python SDK.
 
 ## Features
 - LangChain `VectorStore` interface with `similarity_search`, `from_texts`, etc.
@@ -13,10 +13,10 @@ Encrypted vector search for LangChain using Envector (ES2), powered by homomorph
   - `python3.11 -m venv .venv && source .venv/bin/activate`
 - Install runtime dependencies:
   - `pip install -U pip setuptools wheel`
-  - `pip install es2 langchain sentence-transformers`
+  - `pip install pyenvector langchain sentence-transformers`
 
 ## Usage Overview
-1. Configure Envector using `EnvectorConfig`, pointing to your ES2 endpoint and keys.
+1. Configure Envector using `EnvectorConfig`, pointing to your EnVector endpoint and keys.
 2. Initialize embeddings (or provide pre-computed vectors).
 3. Instantiate `Envector(config=cfg, embeddings=emb)` and call `add_texts`, `add_documents`, or use `as_retriever`.
 4. Run `similarity_search` or plug the retriever into your LangChain pipeline.
@@ -25,13 +25,13 @@ Encrypted vector search for LangChain using Envector (ES2), powered by homomorph
 
 ## Configuration
 Key dataclasses live in `libs/envector/config.py`:
-- `ConnectionConfig`: address or host/port for ES2.
+- `ConnectionConfig`: address or host/port for EnVector.
 - `KeyConfig`: key path, key ID, optional preset/eval mode.
 - `IndexSettings`: index name, dimension (32–4096), query encryption mode, optional output fields and fetch parameters.
 - `EnvectorConfig`: wraps the above and enables auto-creation via `create_if_missing`.
 
 ## Data Model
-- Each vector stores a single `metadata` string in ES2.
+- Each vector stores a single `metadata` string in EnVector.
 - To align with LangChain’s `Document`, inserts wrap data as JSON: `{"text": ..., "metadata": ...}`.
 - Retrieval unwraps JSON, returning `Document(page_content=text, metadata={...})`.
 - Client-side filtering requires the JSON envelope to include an object under `metadata`.
@@ -48,12 +48,12 @@ Key dataclasses live in `libs/envector/config.py`:
 
   cfg = EnvectorConfig(
       connection=ConnectionConfig(
-        address=ES2_ADDRESS, 
-        access_token=ES2_ACCESS_TOKEN
+        address=ENVECTOR_ADDRESS, 
+        access_token=ENVECTOR_ACCESS_TOKEN
       ),
       key=KeyConfig(
-        key_path=ES2_KEY_PATH, 
-        key_id=ES2_KEY_ID, 
+        key_path=ENVECTOR_KEY_PATH, 
+        key_id=ENVECTOR_KEY_ID, 
         preset="ip", 
         eval_mode="rmp"
       ),
@@ -100,18 +100,18 @@ Key dataclasses live in `libs/envector/config.py`:
   The methods `similarity_search` and `similarity_search_with_vector` (with `embeddings.embed_query()`) are also available to perform vector search.
 
 ## Troubleshooting
-- Connection issues: verify ES2 address and registered keys.
+- Connection issues: verify EnVector address and registered keys.
 - Embeddings mismatch: ensure embedding dimension equals `index.dim` when supplying vectors.
 - Unexpected raw strings: confirm inserts used the JSON envelope.
 - Key Issues: check key's metadata to sync with the registered key if facing any key issue.
 
-## Testing Without ES2
-- Run unit tests offline (no ES2 or SDK required):
+## Testing Without EnVector
+- Run unit tests offline (no EnVector or SDK required):
   - `python -m pytest -q -m "not integration"`
   - or `python scripts/run_unit_tests.py`
 - Run integration tests (requires server and keys):
-  - Export `ES2_ADDRESS`, `ES2_KEY_PATH`, `ES2_KEY_ID`
-  - Optional: `ES2_USE_EMBEDDINGS=1`, `ES2_EMB_MODEL`, `ES2_USE_HF_DATASET=1`
+  - Export `ENVECTOR_ADDRESS`, `ENVECTOR_KEY_PATH`, `ENVECTOR_KEY_ID`
+  - Optional: `ENVECTOR_USE_EMBEDDINGS=1`, `ENVECTOR_EMB_MODEL`, `ENVECTOR_USE_HF_DATASET=1`
   - `python -m pytest -q -m integration -s`
 
 ## Contributing
diff --git a/libs/envector/README.md b/libs/envector/README.md
index 0ff389d..a9323f2 100644
--- a/libs/envector/README.md
+++ b/libs/envector/README.md
@@ -1,15 +1,15 @@
 # Envector (LangChain VectorStore)
 
-High-level VectorStore adaptor for Envector (ES2), using the `es2` SDK. Vectors are always encrypted on the server; the SDK performs required crypto client-side.
+High-level VectorStore adaptor for Envector, using the `pyenvector` SDK. Vectors are always encrypted on the server; the SDK performs required crypto client-side.
 
 Key points
-- Use high-level `es2.ES2` and `es2.Index`; avoid low-level `es2.api.Indexer`/gRPC.
+- Use high-level `pyenvector.EnvectorClient` and `pyenvector.Index`; avoid low-level `pyenvector.api.Indexer`/gRPC.
 - Index encryption is fixed to `cipher`. Query can be `plain` or `cipher`.
 - Metadata is stored as a single JSON string per item: `{id, text, metadata}`.
 
 Files
 - `config.py`: Configuration dataclasses (connection, key, index).
-- `client.py`: Initializes ES2 + index and returns an `Index` instance.
+- `client.py`: Initializes EnVector + index and returns an `Index` instance.
 - `vectorstore.py`: `Envector` VectorStore implementation.
 - `retriever.py`: Optional wrapper retriever.
 - `examples/`: Minimal examples.
diff --git a/libs/envector/examples/basic_usage.py b/libs/envector/examples/basic_usage.py
index d70b61b..88eeb47 100644
--- a/libs/envector/examples/basic_usage.py
+++ b/libs/envector/examples/basic_usage.py
@@ -1,7 +1,7 @@
 """Basic usage example for Envector VectorStore.
 
 Requirements:
-- `es2`
+- `pyenvector`
 - `langchain` (version providing VectorStore APIs)
 - An embeddings backend, e.g. sentence-transformers
 """
diff --git a/libs/envector/examples/ingest_synthetic_1k.py b/libs/envector/examples/ingest_synthetic_1k.py
index 8b055c8..ec92a7d 100644
--- a/libs/envector/examples/ingest_synthetic_1k.py
+++ b/libs/envector/examples/ingest_synthetic_1k.py
@@ -1,7 +1,7 @@
 """Ingest the synthetic 1K dataset into Envector.
 
 Requires:
-- ES2 server and keys.
+- EnVector server and keys.
 - Dataset at `data/synthetic_rag_1k.jsonl` (run scripts/make_synthetic_rag_dataset.py).
 
 Usage:
diff --git a/libs/envector/langchain_envector/__init__.py b/libs/envector/langchain_envector/__init__.py
index cf7a9b6..0567c2e 100644
--- a/libs/envector/langchain_envector/__init__.py
+++ b/libs/envector/langchain_envector/__init__.py
@@ -1,6 +1,6 @@
 """Envector LangChain integration package.
 
-Provides a LangChain-compatible VectorStore that wraps the high-level `es2` SDK.
+Provides a LangChain-compatible VectorStore that wraps the high-level `pyenvector` SDK.
 All code and comments are in English as per project rules.
 """
 
diff --git a/libs/envector/langchain_envector/client.py b/libs/envector/langchain_envector/client.py
index c3ac2f9..930913d 100644
--- a/libs/envector/langchain_envector/client.py
+++ b/libs/envector/langchain_envector/client.py
@@ -4,45 +4,45 @@
 
 
 class EnvectorClient:
-    """Thin convenience client around the high-level `es2` SDK.
+    """Thin convenience client around the high-level `pyenvector` SDK.
 
     - Establishes a connection
     - Initializes key and index configuration
     - Optionally creates the index if missing
-    - Provides access to the ES2 `Index` instance
+    - Provides access to the envector `Index` instance
     """
 
     def __init__(self, config: EnvectorConfig):
         self.config = config
-        self._es2 = None
+        self._ev = None
         self._index = None
 
     def init(self):
-        import es2
+        import pyenvector as ev
 
         c = self.config.connection
         k = self.config.key
         i = self.config.index
 
-        es2_client = es2.ES2()
+        ev_client = ev.EnvectorClient()
 
         # Connection
         if c.address:
-            es2_client.init_connect(address=c.address, access_token=c.access_token)
+            ev_client.init_connect(address=c.address, access_token=c.access_token)
         else:
             if not (c.host and c.port):
                 raise ValueError("Either address or host+port must be provided.")
-            es2_client.init_connect(
+            ev_client.init_connect(
                 host=c.host, port=c.port, access_token=c.access_token
             )
 
         # Key path baseline for Index
-        from es2.index import Index as _Index
+        from pyenvector.index import Index as _Index
 
         _Index.init_key_path(k.key_path)
 
         # Index config + key setup
-        es2_client.init_index_config(
+        ev_client.init_index_config(
             index_name=i.index_name,
             dim=i.dim,
             key_path=k.key_path,
@@ -59,13 +59,13 @@ def init(self):
 
         # Create index if missing
         if self.config.create_if_missing:
-            idx_list = es2_client.get_index_list()
+            idx_list = ev_client.get_index_list()
             if i.index_name not in idx_list:
-                es2_client.create_index(index_name=i.index_name, dim=i.dim)
+                ev_client.create_index(index_name=i.index_name, dim=i.dim)
 
         # Bind index instance
-        self._index = es2.Index(i.index_name)
-        self._es2 = es2_client
+        self._index = ev.Index(i.index_name)
+        self._ev = ev_client
         return self
 
     @property
@@ -75,7 +75,7 @@ def index(self):
         return self._index
 
     @property
-    def es2(self):
-        if self._es2 is None:
+    def ev(self):
+        if self._ev is None:
             raise RuntimeError("Client not initialized. Call init().")
-        return self._es2
+        return self._ev
diff --git a/libs/envector/langchain_envector/types.py b/libs/envector/langchain_envector/types.py
index 286a24c..99d4a73 100644
--- a/libs/envector/langchain_envector/types.py
+++ b/libs/envector/langchain_envector/types.py
@@ -29,9 +29,9 @@ class SearchResult:
 
 
 def pack_metadata(text: str, metadata: Optional[Dict[str, Any]] = None) -> str:
-    """Pack text and metadata into a single JSON string field accepted by ES2.
+    """Pack text and metadata into a single JSON string field accepted by pyenvector.
 
-    ES2 metadata API stores lists of strings; we store a single JSON blob per item.
+    pyenvector metadata API stores lists of strings; we store a single JSON blob per item.
     Item-level IDs are not persisted/addressable.
     """
     import json
@@ -46,7 +46,7 @@ def pack_metadata(text: str, metadata: Optional[Dict[str, Any]] = None) -> str:
 def unpack_metadata(raw: Any) -> Dict[str, Any]:
     """Return metadata as a dict regardless of the raw payload type.
 
-    Recent ES2 versions may return decrypted metadata as a Python dict instead
+    Recent pyenvector versions may return decrypted metadata as a Python dict instead
     of the JSON string we originally stored. We normalise the payload here so
     downstream code always works with a dictionary.
     """
@@ -79,7 +79,7 @@ def unpack_metadata(raw: Any) -> Dict[str, Any]:
             if isinstance(data, dict):
                 return data
         except Exception:
-            # Some ES2 responses return Python-literal strings (single quotes).
+            # Some pyenvector responses return Python-literal strings (single quotes).
             try:
                 import ast
 
diff --git a/libs/envector/langchain_envector/vectorstore.py b/libs/envector/langchain_envector/vectorstore.py
index eef1cb5..67bccbf 100644
--- a/libs/envector/langchain_envector/vectorstore.py
+++ b/libs/envector/langchain_envector/vectorstore.py
@@ -38,10 +38,10 @@ def __init__(
 
 
 class Envector(VectorStore):  # type: ignore[misc]
-    """LangChain-compatible VectorStore adaptor for Envector (ES2).
+    """LangChain-compatible VectorStore adaptor for Envector.
 
-    This class wraps the high-level `es2` SDK. It does not use low-level
-    gRPC stubs or `es2.api.Indexer` directly.
+    This class wraps the high-level `pyenvector` SDK. It does not use low-level
+    gRPC stubs or `pyenvector.api.Indexer` directly.
     """
 
     def __init__(
@@ -89,7 +89,7 @@ def add_texts(
         # Prepare metadata JSON strings per item
         packed = [pack_metadata(t, m) for t, m in zip(texts, metadatas)]
 
-        # Insert using high-level ES2 Index
+        # Insert using high-level pyenvector Index
         result_ids = self.client.index.insert(data=vectors, metadata=packed)
 
         # Return ephemeral placeholders to satisfy VectorStore interface,
@@ -111,7 +111,7 @@ def _similarity_search_with_scores(
         results = self.client.index.search(
             query=embedding, top_k=top_k, output_fields=self.config.index.output_fields
         )
-        # ES2 Index.search returns a list for each query; we passed single query
+        # pyenvector Index.search returns a list for each query; we passed single query
         result = (
             results[0]
             if isinstance(results, list) and results and isinstance(results[0], list)
@@ -265,7 +265,7 @@ def add_documents(
         extracting `page_content` and `metadata` from each Document.
 
         Notes:
-        - Manual `ids` are ignored (ES2 does not support user-provided IDs).
+        - Manual `ids` are ignored (EnVector does not support user-provided IDs).
         - When `embeddings` is not configured, you must supply `vectors`.
         - Returns ephemeral IDs as produced by the client insert.
         """
diff --git a/pyproject.toml b/pyproject.toml
index 2e21520..acb83b8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,8 +7,8 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "langchain-envector"
-version = "0.1.2"
-description = "LangChain VectorStore integration for Envector (ES2) encrypted vector search"
+version = "0.1.3"
+description = "LangChain VectorStore integration for Envector"
 readme = "README.md"
 license = {text = "MIT"}
 requires-python = ">=3.9,<3.14"
@@ -16,10 +16,10 @@ authors = [
   { name = "Envector Contributors" }
 ]
 dependencies = [
-  "es2",
+  "pyenvector",
   "langchain>=0.2.0",
 ]
-keywords = ["langchain", "vectorstore", "homomorphic-encryption", "ckks", "encrypted-search", "envector", "es2"]
+keywords = ["langchain", "vectorstore", "homomorphic-encryption", "ckks", "encrypted-search", "envector", "pyenvector"]
 classifiers = [
   "Programming Language :: Python :: 3",
   "Programming Language :: Python :: 3.9",
diff --git a/pytest.ini b/pytest.ini
index 6d12c43..70b79b9 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,6 +1,6 @@
 [pytest]
 markers =
-    integration: tests that require a running ES2 server and the real es2 SDK
+    integration: tests that require a running EnVector server and the real EnVector SDK
 testpaths =
     tests
 
diff --git a/tests/integration/test_es2_integration.py b/tests/integration/test_es2_integration.py
index 8f5fca1..8a62b30 100644
--- a/tests/integration/test_es2_integration.py
+++ b/tests/integration/test_es2_integration.py
@@ -25,35 +25,35 @@ def _require_env(name: str) -> str:
 
 
 @pytest.mark.skipif(
-    os.environ.get("ES2_ADDRESS") is None,
-    reason="Set ES2_ADDRESS (e.g., 0.0.0.0:50050) to enable ES2 integration tests",
+    os.environ.get("ENVECTOR_ADDRESS") is None,
+    reason="Set ENVECTOR_ADDRESS (e.g., 0.0.0.0:50050) to enable Envector integration tests",
 )
 def test_e2e_vectorstore_plain_and_cipher():
     try:
-        import es2  # type: ignore
+        import pyenvector  # type: ignore
     except Exception as e:  # pragma: no cover - env-dependent
-        pytest.skip(f"es2 SDK not available: {e}")
+        pytest.skip(f"pyenvector SDK not available: {e}")
 
-    address = _require_env("ES2_ADDRESS")
-    key_path = _require_env("ES2_KEY_PATH")
-    key_id = _require_env("ES2_KEY_ID")
-    use_emb = os.environ.get("ES2_USE_EMBEDDINGS") in {"1", "true", "TRUE", "yes"}
+    address = _require_env("ENVECTOR_ADDRESS")
+    key_path = _require_env("ENVECTOR_KEY_PATH")
+    key_id = _require_env("ENVECTOR_KEY_ID")
+    use_emb = os.environ.get("ENVECTOR_USE_EMBEDDINGS") in {"1", "true", "TRUE", "yes"}
     model_name = os.environ.get(
-        "ES2_EMB_MODEL", "sentence-transformers/all-MiniLM-L6-v2"
+        "ENVECTOR_EMB_MODEL", "sentence-transformers/all-MiniLM-L6-v2"
     )
-    use_hf = os.environ.get("ES2_USE_HF_DATASET") in {"1", "true", "TRUE", "yes"}
-    hf_name = os.environ.get("ES2_HF_NAME", "ag_news")
-    hf_subset = os.environ.get("ES2_HF_SUBSET")
-    hf_split = os.environ.get("ES2_HF_SPLIT", "train")
-    hf_text_col = os.environ.get("ES2_HF_TEXT_COL", "text")
+    use_hf = os.environ.get("ENVECTOR_USE_HF_DATASET") in {"1", "true", "TRUE", "yes"}
+    hf_name = os.environ.get("ENVECTOR_HF_NAME", "ag_news")
+    hf_subset = os.environ.get("ENVECTOR_HF_SUBSET")
+    hf_split = os.environ.get("ENVECTOR_HF_SPLIT", "train")
+    hf_text_col = os.environ.get("ENVECTOR_HF_TEXT_COL", "text")
     hf_meta_cols = [
-        c for c in os.environ.get("ES2_HF_META_COLS", "label").split(",") if c
+        c for c in os.environ.get("ENVECTOR_HF_META_COLS", "label").split(",") if c
     ]
-    hf_size = int(os.environ.get("ES2_HF_SIZE", "200"))
-    hf_seed = int(os.environ.get("ES2_HF_SEED", "42"))
+    hf_size = int(os.environ.get("ENVECTOR_HF_SIZE", "200"))
+    hf_seed = int(os.environ.get("ENVECTOR_HF_SEED", "42"))
 
     # Determine dimension: either from env, or from embeddings model, or default
-    dim_env = os.environ.get("ES2_DIM")
+    dim_env = os.environ.get("ENVECTOR_DIM")
     if use_emb:
         emb = None
         # Prefer LangChain embeddings if available, else fall back to sentence-transformers
@@ -79,13 +79,13 @@ def test_e2e_vectorstore_plain_and_cipher():
         pytest.skip("Envector supports dimensions in [32, 4096]")
 
     base_index_name = os.environ.get(
-        "ES2_INDEX_NAME", f"inttest_{secrets.token_hex(4)}"
+        "ENVECTOR_INDEX_NAME", f"inttest_{secrets.token_hex(4)}"
     )
 
-    import es2
+    import pyenvector as ev
 
-    es2.init_connect(address=address)
-    es2.reset()
+    ev.init_connect(address=address)
+    ev.reset()
 
     # Plain query mode
     cfg_plain = EnvectorConfig(
@@ -205,8 +205,8 @@ def test_e2e_vectorstore_plain_and_cipher():
         assert all("_id" in d.metadata for d in docs_cc)
 
     # Cleanup
-    store_plain.client.es2.init_connect(address=address)
-    store_plain.client.es2.drop_index(cfg_plain.index.index_name)
+    store_plain.client.ev.init_connect(address=address)
+    store_plain.client.ev.drop_index(cfg_plain.index.index_name)
 
-    store_cc.client.es2.init_connect(address=address)
-    store_cc.client.es2.drop_index(cfg_cc.index.index_name)
+    store_cc.client.ev.init_connect(address=address)
+    store_cc.client.ev.drop_index(cfg_cc.index.index_name)
diff --git a/tests/requirements.txt b/tests/requirements.txt
index d51cc17..02b0ade 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -3,9 +3,9 @@
 # Test runner
 pytest
 
-# ES2 SDK (encrypted vector search) — install from local wheel at repo root
+# pyenvector SDK — install from local wheel at repo root
 # Use a direct wheel path (no PEP 508 direct reference) for maximum pip compatibility.
-./es2-1.0.3rc7-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
+./pyenvector-1.2.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
 
 # Note: LangChain is optional for tests. Integration tests will fall back to
 # sentence-transformers if LangChain embeddings are unavailable.

From 21f0607921c2c882834b0e3944a9a5deb73f1b76 Mon Sep 17 00:00:00 2001
From: suyeong <suyeong@cryptolab.co.kr>
Date: Mon, 8 Dec 2025 01:00:02 +0000
Subject: [PATCH 13/13] precommit: rm try-catch import sdk

---
 tests/integration/test_es2_integration.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tests/integration/test_es2_integration.py b/tests/integration/test_es2_integration.py
index 8a62b30..a7272fd 100644
--- a/tests/integration/test_es2_integration.py
+++ b/tests/integration/test_es2_integration.py
@@ -29,11 +29,6 @@ def _require_env(name: str) -> str:
     reason="Set ENVECTOR_ADDRESS (e.g., 0.0.0.0:50050) to enable Envector integration tests",
 )
 def test_e2e_vectorstore_plain_and_cipher():
-    try:
-        import pyenvector  # type: ignore
-    except Exception as e:  # pragma: no cover - env-dependent
-        pytest.skip(f"pyenvector SDK not available: {e}")
-
     address = _require_env("ENVECTOR_ADDRESS")
     key_path = _require_env("ENVECTOR_KEY_PATH")
     key_id = _require_env("ENVECTOR_KEY_ID")