CryptoLabInc · euphoria0-0 · Jan 2, 2026 · Dec 24, 2025 · Dec 24, 2025 · Dec 24, 2025
diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
@@ -27,6 +27,7 @@ jobs:
           python -m pip install --upgrade pip setuptools wheel
           python -m pip install -e .
           python -m pip install pytest pre-commit
+          python -m pip install langchain-tests
 
       - name: Lint and format
         run: pre-commit run --all-files --show-diff-on-failure

diff --git a/README.md b/README.md
@@ -42,8 +42,9 @@ Key dataclasses live in `libs/envector/config.py`:
 - Filtering happens client-side; ensure metadata is JSON for structured filters.
 
 ## Examples
-- Configuration
-  ```python
+### Configuration
+
+```python
   from langchain_envector.config import ConnectionConfig, EnvectorConfig, IndexSettings, KeyConfig
 
   cfg = EnvectorConfig(
@@ -66,53 +67,109 @@ Key dataclasses live in `libs/envector/config.py`:
   )
   ```
 
-- Add documents (from LangChain Documents):
-
-  ```python
-  from langchain_core.documents import Document
-  from langchain_envector.vectorstore import Envector
-
-  docs = [
-    Document(
-      page_content="chunk-1", 
-      metadata={"source": "paper.pdf", "page": 1, "chunk": 0}
-    ),
-    Document(
-      page_content="chunk-2", 
-      metadata={"source": "paper.pdf", "page": 1, "chunk": 1}
-    ),
-  ]
-
-  store = Envector(config=cfg, embeddings=emb)
-  store.add_documents(docs)
-  ```
+### Add documents (from LangChain Documents):
 
-  The method `add_texts` is also available to store texts.
+```python
+from langchain_core.documents import Document
+from langchain_envector.vectorstore import Envector
 
-- Similarity search
+docs = [
+  Document(
+    page_content="chunk-1", 
+    metadata={"source": "paper.pdf", "page": 1, "chunk": 0}
+  ),
+  Document(
+    page_content="chunk-2", 
+    metadata={"source": "paper.pdf", "page": 1, "chunk": 1}
+  ),
+]
 
-  ```python
-  results = store.similarity_search_with_score(query, k=3)
-  for doc, score in results:
-      print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
-  ```
+store = Envector(config=cfg, embeddings=emb)
+store.add_documents(docs)
+```
+
+Or you can use `add_texts` to store vectors and their texts.
+
+```python
+store.add_texts(
+    texts=["chunk 3"],
+    metadatas=[{"source": "paper.pdf", "page": 1, "chunk": 2}]
+)
+```
+
+### Similarity search
+
+```python
+results = store.similarity_search(query, k=1)
+for doc in results:
+    print(f"* {doc.page_content} [{doc.metadata}]")
+```
+
+#### Similarity Search with Score
+
+```python
+results = store.similarity_search_with_score(query, k=1)
+for doc, score in results:
+    print(f"* [SIM={score:.3f}] {doc.page_content} [{doc.metadata}]")
+```
+
+
+#### Similarity Search with Vector
+
+```python
+query_embedding = embeddings.embed_query(query)
+print(f"Query: {query_embedding[:3]}")
+results = store.similarity_search_by_vector(query_embedding, k=3)
+for doc in results:
+    print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
+```
 
-  The methods `similarity_search` and `similarity_search_with_vector` (with `embeddings.embed_query()`) are also available to perform vector search.
 
 ## Troubleshooting
 - Connection issues: verify EnVector address and registered keys.
 - Embeddings mismatch: ensure embedding dimension equals `index.dim` when supplying vectors.
 - Unexpected raw strings: confirm inserts used the JSON envelope.
 - Key Issues: check key's metadata to sync with the registered key if facing any key issue.
 
-## Testing Without EnVector
-- Run unit tests offline (no EnVector or SDK required):
-  - `python -m pytest -q -m "not integration"`
-  - or `python scripts/run_unit_tests.py`
-- Run integration tests (requires server and keys):
-  - Export `ENVECTOR_ADDRESS`, `ENVECTOR_KEY_PATH`, `ENVECTOR_KEY_ID`
-  - Optional: `ENVECTOR_USE_EMBEDDINGS=1`, `ENVECTOR_EMB_MODEL`, `ENVECTOR_USE_HF_DATASET=1`
-  - `python -m pytest -q -m integration -s`
+## Test
+
+Before running tests, install dependencies for pytest:
+
+```bash
+pip install -r tests/requirements.txt
+```
+
+### Unit Test
+
+Run unit tests offline (no EnVector or SDK required)
+
+```bash
+python -m pytest -q -m "not integration"
+# or
+python scripts/run_unit_tests.py
+```
+
+### Integration Test
+
+Run integration tests (requires enVector server)
+
+1. Prepare the running enVector server
+
+2. Export the environment variables:
+
+  - `ENVECTOR_ADDRESS`
+  - `ENVECTOR_KEY_PATH`
+  - `ENVECTOR_KEY_ID`
+  - `ENVECTOR_INDEX_NAME`
+  - (Optional) `ENVECTOR_USE_EMBEDDINGS=1`
+  - (Optional) `ENVECTOR_EMB_MODEL`
+  - (Optional) `ENVECTOR_USE_HF_DATASET=1`
+
+3. Run the following command:
+
+```bash
+python -m pytest -q -m integration -s
+```
 
 ## Contributing
 See [`CONTRIBUTE.md`](CONTRIBUTE.md) for development, testing, and PR guidelines.
diff --git a/libs/envector/langchain_envector/vectorstore.py b/libs/envector/langchain_envector/vectorstore.py
@@ -118,12 +118,18 @@ def _similarity_search_with_scores(
             else results
         )
 
+        if not result:
+            return []
+
         docs_with_scores: List[Tuple[Document, float]] = []
         # Iterate from top-1 to top-k
         for item in result:
             # item = {"id": ..., "score": float, "metadata": [str] or {...}}
             score = float(item.get("score", 0.0))
             md_obj_raw = item.get("metadata")
+            if md_obj_raw in (None, "", [], {}):
+                # Skip placeholder/empty hits returned by the backend.
+                continue
 
             # Metadata encryption/decryption is handled by the SDK.
             # Envector currently supports a single associated data field (string).
@@ -133,6 +139,9 @@ def _similarity_search_with_scores(
 
             text = md_obj.get("text", "") if "_raw" not in md_obj else md_obj["_raw"]
             metadata = md_obj.get("metadata", {}) if "_raw" not in md_obj else {}
+            if not text and not metadata:
+                # Treat empty text+metadata as no result.
+                continue
 
             # client-side filter
             if filter:
@@ -143,9 +152,11 @@ def _similarity_search_with_scores(
             if score_threshold is not None and score < score_threshold:
                 continue
 
+            doc_id = item.get("id")
             doc = Document(
                 page_content=text,
                 metadata={**metadata, "_score": score, "_id": item.get("id")},
+                id=doc_id,
             )
             docs_with_scores.append((doc, score))
 
@@ -181,7 +192,16 @@ def similarity_search(
             fetch_k=fetch_k,
             **kwargs,
         )
-        return [doc for doc, _ in docs_with_scores]
+        return [
+            Document(
+                page_content=doc.page_content,
+                metadata={
+                    k: v for k, v in doc.metadata.items() if k not in ("_score", "_id")
+                },
+                id=getattr(doc, "id", None),
+            )
+            for doc, _ in docs_with_scores
+        ]
 
     def similarity_search_with_score(
         self,

diff --git a/pytest.ini b/pytest.ini
@@ -1,6 +1,6 @@
 [pytest]
 markers =
     integration: tests that require a running EnVector server and the real EnVector SDK
+asyncio_mode = auto
 testpaths =
     tests
-
diff --git a/tests/integration/test_es2_integration.py → tests/integration_tests/test_e2e.py b/tests/integration/test_es2_integration.py → tests/integration_tests/test_e2e.py
@@ -136,7 +136,7 @@ def test_e2e_vectorstore_plain_and_cipher():
                 (d.page_content[:80] + ("..." if len(d.page_content) > 80 else "")),
             )
         assert len(docs) >= 1
-        assert all("_id" in d.metadata for d in docs)
+        assert all(getattr(d, "id", None) or "_id" in d.metadata for d in docs)
         # optional filter check if 'label' is part of meta
         if not use_hf:
             docs_f = store_plain.similarity_search(
@@ -153,7 +153,7 @@ def test_e2e_vectorstore_plain_and_cipher():
             "[plain] results (explicit embedding e1):", [d.page_content for d in docs]
         )
         assert any(d.page_content == texts[0] for d in docs)
-        assert all("_id" in d.metadata for d in docs)
+        assert all(getattr(d, "id", None) or "_id" in d.metadata for d in docs)
         docs_f = store_plain.similarity_search(
             "q", k=2, embedding=e2, filter={"label": "B"}
         )
@@ -189,15 +189,15 @@ def test_e2e_vectorstore_plain_and_cipher():
                 (d.page_content[:80] + ("..." if len(d.page_content) > 80 else "")),
             )
         assert len(docs_cc) >= 1
-        assert all("_id" in d.metadata for d in docs_cc)
+        assert all(getattr(d, "id", None) or "_id" in d.metadata for d in docs_cc)
     else:
         docs_cc = store_cc.similarity_search("q", k=2, embedding=e2)
         print(
             "[cipher] results (explicit embedding e2):",
             [d.page_content for d in docs_cc],
         )
         assert any(d.page_content == texts[1] for d in docs_cc)
-        assert all("_id" in d.metadata for d in docs_cc)
+        assert all(getattr(d, "id", None) or "_id" in d.metadata for d in docs_cc)
 
     # Cleanup
     store_plain.client.ev.init_connect(address=address)