From 9b3af00d8df0b027b1f99e1bd2e8e7693bc130a1 Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Tue, 28 Apr 2026 11:57:04 -0300 Subject: [PATCH 01/17] chore: ignore .worktrees/ directory Used by superpowers workflows to host isolated git worktrees during implementation, never meant to be tracked. Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index cd448815..2aa97b8e 100644 --- a/.gitignore +++ b/.gitignore @@ -16,4 +16,6 @@ codegen.log Brewfile.lock.json .ipynb_checkpoints -.DS_Store \ No newline at end of file +.DS_Store + +.worktrees/ \ No newline at end of file From b2e421809ec6852d712e41a5906763575ecae0b2 Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Tue, 28 Apr 2026 12:03:19 -0300 Subject: [PATCH 02/17] feat(tracing): add StepType.EMBEDDING enum value (OPEN-10480) Co-Authored-By: Claude Opus 4.7 (1M context) --- src/openlayer/lib/tracing/enums.py | 1 + tests/test_tracing_core.py | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/src/openlayer/lib/tracing/enums.py b/src/openlayer/lib/tracing/enums.py index ee483b7a..e99d4b72 100644 --- a/src/openlayer/lib/tracing/enums.py +++ b/src/openlayer/lib/tracing/enums.py @@ -8,6 +8,7 @@ class StepType(enum.Enum): AGENT = "agent" CHAT_COMPLETION = "chat_completion" + EMBEDDING = "embedding" GUARDRAIL = "guardrail" HANDOFF = "handoff" RETRIEVER = "retriever" diff --git a/tests/test_tracing_core.py b/tests/test_tracing_core.py index 99e2b18a..8dd2ec88 100644 --- a/tests/test_tracing_core.py +++ b/tests/test_tracing_core.py @@ -812,3 +812,13 @@ def my_func(tool_count: int) -> None: # "tool_count" exists in inputs, so it should be promoted from there my_func(tool_count=5) + + +class TestEmbeddingStep: + """Tests for StepType.EMBEDDING and the add_embedding_step_to_trace helper.""" + + def test_step_type_embedding_exists(self) -> None: + """StepType.EMBEDDING must exist with the expected string value.""" + from openlayer.lib.tracing.enums import StepType + + assert StepType.EMBEDDING.value == "embedding" From 9d7887a2f4b1a8a8bc60f06011e3b68384bc4e56 Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Tue, 28 Apr 2026 12:04:44 -0300 Subject: [PATCH 03/17] feat(tracing): add add_embedding_step_to_trace helper (OPEN-10480) Co-Authored-By: Claude Opus 4.7 (1M context) --- src/openlayer/lib/tracing/tracer.py | 9 +++++ tests/test_tracing_core.py | 53 +++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/src/openlayer/lib/tracing/tracer.py b/src/openlayer/lib/tracing/tracer.py index baea887f..ad18c80c 100644 --- a/src/openlayer/lib/tracing/tracer.py +++ b/src/openlayer/lib/tracing/tracer.py @@ -518,6 +518,15 @@ def add_chat_completion_step_to_trace(**kwargs) -> None: step.log(**kwargs) +def add_embedding_step_to_trace(**kwargs) -> None: + """Adds an embedding step to the trace.""" + with create_step( + step_type=enums.StepType.EMBEDDING, + name=kwargs.get("name", "Embedding"), + ) as step: + step.log(**kwargs) + + def trace( *step_args, inference_pipeline_id: Optional[str] = None, diff --git a/tests/test_tracing_core.py b/tests/test_tracing_core.py index 8dd2ec88..a8984fb7 100644 --- a/tests/test_tracing_core.py +++ b/tests/test_tracing_core.py @@ -822,3 +822,56 @@ def test_step_type_embedding_exists(self) -> None: from openlayer.lib.tracing.enums import StepType assert StepType.EMBEDDING.value == "embedding" + + def test_add_embedding_step_to_trace_creates_step_with_correct_type(self) -> None: + """The helper must create a step with StepType.EMBEDDING and forward kwargs to step.log.""" + from unittest.mock import MagicMock + + from openlayer.lib.tracing import enums + + mock_step = MagicMock() + mock_ctx = MagicMock() + mock_ctx.__enter__.return_value = mock_step + mock_ctx.__exit__.return_value = None + + with patch.object(tracer, "create_step", return_value=mock_ctx) as mock_create_step: + tracer.add_embedding_step_to_trace( + name="OpenAI Embedding", + inputs={"input": "hello"}, + output=[0.1, 0.2, 0.3], + model="text-embedding-3-small", + embedding_dimensions=3, + embedding_count=1, + ) + + mock_create_step.assert_called_once_with( + step_type=enums.StepType.EMBEDDING, + name="OpenAI Embedding", + ) + mock_step.log.assert_called_once_with( + name="OpenAI Embedding", + inputs={"input": "hello"}, + output=[0.1, 0.2, 0.3], + model="text-embedding-3-small", + embedding_dimensions=3, + embedding_count=1, + ) + + def test_add_embedding_step_to_trace_uses_default_name(self) -> None: + """When no name is given, the helper must default to 'Embedding'.""" + from unittest.mock import MagicMock + + from openlayer.lib.tracing import enums + + mock_step = MagicMock() + mock_ctx = MagicMock() + mock_ctx.__enter__.return_value = mock_step + mock_ctx.__exit__.return_value = None + + with patch.object(tracer, "create_step", return_value=mock_ctx) as mock_create_step: + tracer.add_embedding_step_to_trace(model="x", output=[0.0]) + + mock_create_step.assert_called_once_with( + step_type=enums.StepType.EMBEDDING, + name="Embedding", + ) From 5bdf9db2319dd158e362fb117f340215f2feae51 Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Tue, 28 Apr 2026 12:05:24 -0300 Subject: [PATCH 04/17] test(bedrock): backfill regression for Anthropic chat invoke path (OPEN-10480) Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/test_bedrock_integration.py | 60 +++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 tests/test_bedrock_integration.py diff --git a/tests/test_bedrock_integration.py b/tests/test_bedrock_integration.py new file mode 100644 index 00000000..8d7e4947 --- /dev/null +++ b/tests/test_bedrock_integration.py @@ -0,0 +1,60 @@ +"""Test AWS Bedrock integration.""" + +import io +import json +from unittest.mock import MagicMock, patch + + +class TestBedrockChatRegression: + """Lock in existing chat-completion behaviour before refactoring.""" + + def _make_anthropic_response(self, body_dict): + """Build a response dict mimicking what bedrock-runtime returns.""" + from botocore.response import StreamingBody + + body_bytes = json.dumps(body_dict).encode("utf-8") + return {"body": StreamingBody(io.BytesIO(body_bytes), len(body_bytes))} + + def test_anthropic_chat_invoke_routes_through_existing_handler(self) -> None: + """invoke_model with a Claude model must hit the existing chat handler.""" + from openlayer.lib.integrations.bedrock_tracer import trace_bedrock + + mock_client = MagicMock() + mock_client.invoke_model.return_value = self._make_anthropic_response( + { + "id": "msg_01", + "type": "message", + "role": "assistant", + "content": [{"type": "text", "text": "hello back"}], + "stop_reason": "end_turn", + "usage": {"input_tokens": 10, "output_tokens": 5}, + } + ) + + traced = trace_bedrock(mock_client) + + with patch("openlayer.lib.integrations.bedrock_tracer.add_to_trace") as mock_add: + response = traced.invoke_model( + modelId="anthropic.claude-3-haiku-20240307-v1:0", + body=json.dumps( + { + "anthropic_version": "bedrock-2023-05-31", + "max_tokens": 100, + "messages": [{"role": "user", "content": "hi"}], + } + ), + contentType="application/json", + accept="application/json", + ) + + # The chat path called add_to_trace (which wraps add_chat_completion_step_to_trace). + mock_add.assert_called_once() + kwargs = mock_add.call_args.kwargs + assert kwargs["model"] == "anthropic.claude-3-haiku-20240307-v1:0" + assert kwargs["output"] == "hello back" + assert kwargs["prompt_tokens"] == 10 + assert kwargs["completion_tokens"] == 5 + + # Caller can still consume the response body — critical regression guard. + replayed = response["body"].read() + assert b"hello back" in replayed From 9d1b09a396d44d509d30cd52da174223241f355e Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Tue, 28 Apr 2026 12:06:58 -0300 Subject: [PATCH 05/17] feat(bedrock): route embedding modelIds to dedicated handler (OPEN-10480) Co-Authored-By: Claude Opus 4.7 (1M context) --- .../lib/integrations/bedrock_tracer.py | 23 ++++++++ tests/test_bedrock_integration.py | 56 +++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/src/openlayer/lib/integrations/bedrock_tracer.py b/src/openlayer/lib/integrations/bedrock_tracer.py index a497474e..76fed268 100644 --- a/src/openlayer/lib/integrations/bedrock_tracer.py +++ b/src/openlayer/lib/integrations/bedrock_tracer.py @@ -25,6 +25,11 @@ logger = logging.getLogger(__name__) +def _is_embedding_model(model_id: str) -> bool: + """Return True when modelId refers to a Bedrock embedding model.""" + return "embed" in (model_id or "").lower() + + def trace_bedrock(client: "boto3.client") -> "boto3.client": """Patch the Bedrock client to trace model invocations. @@ -63,6 +68,14 @@ def trace_bedrock(client: "boto3.client") -> "boto3.client": @wraps(invoke_model_func) def traced_invoke_model(*args, **kwargs): inference_id = kwargs.pop("inference_id", None) + model_id = kwargs.get("modelId", "") + if _is_embedding_model(model_id): + return handle_embedding_invoke( + *args, + **kwargs, + invoke_func=invoke_model_func, + inference_id=inference_id, + ) return handle_non_streaming_invoke( *args, **kwargs, @@ -153,6 +166,16 @@ def handle_non_streaming_invoke( return response +def handle_embedding_invoke( + invoke_func: callable, + *args, + inference_id: Optional[str] = None, + **kwargs, +) -> Dict[str, Any]: + """Stub — full implementation in next task.""" + return invoke_func(*args, **kwargs) + + def handle_streaming_invoke( invoke_func: callable, *args, diff --git a/tests/test_bedrock_integration.py b/tests/test_bedrock_integration.py index 8d7e4947..88362d45 100644 --- a/tests/test_bedrock_integration.py +++ b/tests/test_bedrock_integration.py @@ -58,3 +58,59 @@ def test_anthropic_chat_invoke_routes_through_existing_handler(self) -> None: # Caller can still consume the response body — critical regression guard. replayed = response["body"].read() assert b"hello back" in replayed + + +class TestBedrockEmbeddingDetection: + """Detection routes embedding models away from the chat handler.""" + + def test_is_embedding_model_titan(self) -> None: + from openlayer.lib.integrations.bedrock_tracer import _is_embedding_model + + assert _is_embedding_model("amazon.titan-embed-text-v1") is True + assert _is_embedding_model("amazon.titan-embed-text-v2:0") is True + + def test_is_embedding_model_cohere(self) -> None: + from openlayer.lib.integrations.bedrock_tracer import _is_embedding_model + + assert _is_embedding_model("cohere.embed-english-v3") is True + assert _is_embedding_model("cohere.embed-multilingual-v3") is True + + def test_is_embedding_model_chat_returns_false(self) -> None: + from openlayer.lib.integrations.bedrock_tracer import _is_embedding_model + + assert _is_embedding_model("anthropic.claude-3-haiku-20240307-v1:0") is False + assert _is_embedding_model("meta.llama3-70b-instruct-v1:0") is False + + def test_is_embedding_model_handles_empty_string(self) -> None: + from openlayer.lib.integrations.bedrock_tracer import _is_embedding_model + + assert _is_embedding_model("") is False + + def test_traced_invoke_routes_embedding_to_new_handler(self) -> None: + """An embedding modelId must call handle_embedding_invoke, not the chat handler.""" + from botocore.response import StreamingBody + + from openlayer.lib.integrations.bedrock_tracer import trace_bedrock + + body = json.dumps( + {"embedding": [0.1, 0.2], "inputTextTokenCount": 4} + ).encode("utf-8") + mock_client = MagicMock() + mock_client.invoke_model.return_value = { + "body": StreamingBody(io.BytesIO(body), len(body)) + } + traced = trace_bedrock(mock_client) + + with patch( + "openlayer.lib.integrations.bedrock_tracer.handle_embedding_invoke" + ) as mock_embed, patch( + "openlayer.lib.integrations.bedrock_tracer.handle_non_streaming_invoke" + ) as mock_chat: + mock_embed.return_value = {"body": "ok"} + traced.invoke_model( + modelId="amazon.titan-embed-text-v2:0", + body=json.dumps({"inputText": "hi"}), + ) + + mock_embed.assert_called_once() + mock_chat.assert_not_called() From 84765e381aa32e8f038ddc2e67e0e72f3f19d485 Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Tue, 28 Apr 2026 12:08:10 -0300 Subject: [PATCH 06/17] feat(bedrock): parse Titan v1/v2 embedding requests (OPEN-10480) Co-Authored-By: Claude Opus 4.7 (1M context) --- .../lib/integrations/bedrock_tracer.py | 100 +++++++++++++++++- tests/test_bedrock_integration.py | 78 ++++++++++++++ 2 files changed, 176 insertions(+), 2 deletions(-) diff --git a/src/openlayer/lib/integrations/bedrock_tracer.py b/src/openlayer/lib/integrations/bedrock_tracer.py index 76fed268..6fb4cd06 100644 --- a/src/openlayer/lib/integrations/bedrock_tracer.py +++ b/src/openlayer/lib/integrations/bedrock_tracer.py @@ -172,8 +172,104 @@ def handle_embedding_invoke( inference_id: Optional[str] = None, **kwargs, ) -> Dict[str, Any]: - """Stub — full implementation in next task.""" - return invoke_func(*args, **kwargs) + """Handle invoke_model for embedding models (Titan, Cohere).""" + start_time = time.time() + response = invoke_func(*args, **kwargs) + end_time = time.time() + + try: + # Parse the request body + body_str = kwargs.get("body", "{}") + if isinstance(body_str, bytes): + body_str = body_str.decode("utf-8") + body_data = json.loads(body_str) if isinstance(body_str, str) else body_str + + # Read and replace the response body so callers can still consume it + original_body = response["body"] + response_body_bytes = original_body.read() + response_data = json.loads( + response_body_bytes.decode("utf-8") + if isinstance(response_body_bytes, bytes) + else response_body_bytes + ) + new_stream = io.BytesIO(response_body_bytes) + response["body"] = StreamingBody(new_stream, len(response_body_bytes)) + + model_id = kwargs.get("modelId", "") + inputs = _parse_embedding_input(body_data, model_id) + embeddings, dim, count = _parse_embedding_output(response_data, model_id) + prompt_tokens = _parse_embedding_tokens(response_data, model_id) + model_parameters = _get_embedding_model_parameters(body_data, model_id) + + tracer.add_embedding_step_to_trace( + name="AWS Bedrock Embedding", + end_time=end_time, + inputs=inputs, + output=embeddings, + latency=(end_time - start_time) * 1000, + tokens=prompt_tokens, + prompt_tokens=prompt_tokens, + model=model_id, + model_parameters=model_parameters, + embedding_dimensions=dim, + embedding_count=count, + raw_output=response_data, + provider="Bedrock", + id=inference_id, + metadata={}, + ) + + except Exception as e: + logger.error( + "Failed to trace the Bedrock embedding invocation with Openlayer. %s", e + ) + + return response + + +def _parse_embedding_input(body_data: Dict[str, Any], model_id: str) -> Dict[str, Any]: + if model_id.startswith("amazon.titan-embed"): + return {"input": body_data.get("inputText", "")} + if model_id.startswith("cohere.embed"): + return {"input": body_data.get("texts", [])} + return {"input": body_data} + + +def _parse_embedding_output( + response_data: Dict[str, Any], model_id: str +) -> tuple: + """Returns (embeddings, dimensions, count).""" + if model_id.startswith("amazon.titan-embed"): + emb = response_data.get("embedding", []) + return emb, len(emb), 1 + if model_id.startswith("cohere.embed"): + embs = response_data.get("embeddings", []) + dim = len(embs[0]) if embs else 0 + return embs, dim, len(embs) + return [], 0, 0 + + +def _parse_embedding_tokens(response_data: Dict[str, Any], model_id: str) -> int: + if model_id.startswith("amazon.titan-embed"): + return response_data.get("inputTextTokenCount", 0) + return 0 + + +def _get_embedding_model_parameters( + body_data: Dict[str, Any], model_id: str +) -> Dict[str, Any]: + if model_id.startswith("amazon.titan-embed"): + return { + "dimensions": body_data.get("dimensions"), + "normalize": body_data.get("normalize"), + } + if model_id.startswith("cohere.embed"): + return { + "input_type": body_data.get("input_type"), + "truncate": body_data.get("truncate"), + "embedding_types": body_data.get("embedding_types"), + } + return {} def handle_streaming_invoke( diff --git a/tests/test_bedrock_integration.py b/tests/test_bedrock_integration.py index 88362d45..0bc2e620 100644 --- a/tests/test_bedrock_integration.py +++ b/tests/test_bedrock_integration.py @@ -114,3 +114,81 @@ def test_traced_invoke_routes_embedding_to_new_handler(self) -> None: mock_embed.assert_called_once() mock_chat.assert_not_called() + + +class TestBedrockTitanEmbedding: + """Titan v1 and v2 embedding requests produce well-formed embedding steps.""" + + def _titan_response(self, embedding, token_count): + from botocore.response import StreamingBody + + body = json.dumps( + {"embedding": embedding, "inputTextTokenCount": token_count} + ).encode("utf-8") + return {"body": StreamingBody(io.BytesIO(body), len(body))} + + def test_titan_v2_single_embedding(self) -> None: + from openlayer.lib.integrations.bedrock_tracer import trace_bedrock + + vec = [0.1, 0.2, 0.3, 0.4] + mock_client = MagicMock() + mock_client.invoke_model.return_value = self._titan_response(vec, 7) + traced = trace_bedrock(mock_client) + + with patch( + "openlayer.lib.tracing.tracer.add_embedding_step_to_trace" + ) as mock_add: + response = traced.invoke_model( + modelId="amazon.titan-embed-text-v2:0", + body=json.dumps( + {"inputText": "hello world", "dimensions": 4, "normalize": True} + ), + contentType="application/json", + accept="application/json", + ) + + mock_add.assert_called_once() + kwargs = mock_add.call_args.kwargs + assert kwargs["name"] == "AWS Bedrock Embedding" + assert kwargs["model"] == "amazon.titan-embed-text-v2:0" + assert kwargs["provider"] == "Bedrock" + assert kwargs["inputs"] == {"input": "hello world"} + assert kwargs["output"] == vec + assert kwargs["embedding_dimensions"] == 4 + assert kwargs["embedding_count"] == 1 + assert kwargs["prompt_tokens"] == 7 + assert kwargs["tokens"] == 7 + assert kwargs["model_parameters"] == { + "dimensions": 4, + "normalize": True, + } + assert response["body"].read() == json.dumps( + {"embedding": vec, "inputTextTokenCount": 7} + ).encode("utf-8") + + def test_titan_v1_single_embedding(self) -> None: + from openlayer.lib.integrations.bedrock_tracer import trace_bedrock + + vec = [0.5] * 1536 + mock_client = MagicMock() + mock_client.invoke_model.return_value = self._titan_response(vec, 12) + traced = trace_bedrock(mock_client) + + with patch( + "openlayer.lib.tracing.tracer.add_embedding_step_to_trace" + ) as mock_add: + traced.invoke_model( + modelId="amazon.titan-embed-text-v1", + body=json.dumps({"inputText": "another"}), + ) + + kwargs = mock_add.call_args.kwargs + assert kwargs["model"] == "amazon.titan-embed-text-v1" + assert kwargs["embedding_dimensions"] == 1536 + assert kwargs["embedding_count"] == 1 + assert kwargs["prompt_tokens"] == 12 + # v1 has no `dimensions`/`normalize` params in its request body + assert kwargs["model_parameters"] == { + "dimensions": None, + "normalize": None, + } From ba3d831ac2ce03dc7cd676c98e20f5d4e2bb842b Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Tue, 28 Apr 2026 12:08:46 -0300 Subject: [PATCH 07/17] test(bedrock): cover Cohere embedding batch path (OPEN-10480) Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/test_bedrock_integration.py | 51 +++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/tests/test_bedrock_integration.py b/tests/test_bedrock_integration.py index 0bc2e620..0543b096 100644 --- a/tests/test_bedrock_integration.py +++ b/tests/test_bedrock_integration.py @@ -192,3 +192,54 @@ def test_titan_v1_single_embedding(self) -> None: "dimensions": None, "normalize": None, } + + +class TestBedrockCohereEmbedding: + """Cohere v3 embedding produces a multi-vector batch step.""" + + def _cohere_response(self, embeddings): + from botocore.response import StreamingBody + + body = json.dumps( + { + "embeddings": embeddings, + "id": "abc-123", + "response_type": "embeddings_floats", + } + ).encode("utf-8") + return {"body": StreamingBody(io.BytesIO(body), len(body))} + + def test_cohere_embed_batch(self) -> None: + from openlayer.lib.integrations.bedrock_tracer import trace_bedrock + + embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]] + mock_client = MagicMock() + mock_client.invoke_model.return_value = self._cohere_response(embeddings) + traced = trace_bedrock(mock_client) + + with patch( + "openlayer.lib.tracing.tracer.add_embedding_step_to_trace" + ) as mock_add: + traced.invoke_model( + modelId="cohere.embed-english-v3", + body=json.dumps( + { + "texts": ["one", "two", "three"], + "input_type": "search_document", + } + ), + ) + + kwargs = mock_add.call_args.kwargs + assert kwargs["model"] == "cohere.embed-english-v3" + assert kwargs["inputs"] == {"input": ["one", "two", "three"]} + assert kwargs["output"] == embeddings + assert kwargs["embedding_dimensions"] == 3 + assert kwargs["embedding_count"] == 3 + # Cohere v3 does not return tokens in its response body. + assert kwargs["prompt_tokens"] == 0 + assert kwargs["model_parameters"] == { + "input_type": "search_document", + "truncate": None, + "embedding_types": None, + } From c605178e345aad874c56a69fcedca2e93bee279a Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Tue, 28 Apr 2026 12:09:38 -0300 Subject: [PATCH 08/17] test(bedrock): cover embedding failure isolation and body replay (OPEN-10480) Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/test_bedrock_integration.py | 53 +++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/tests/test_bedrock_integration.py b/tests/test_bedrock_integration.py index 0543b096..5c6ca3bb 100644 --- a/tests/test_bedrock_integration.py +++ b/tests/test_bedrock_integration.py @@ -243,3 +243,56 @@ def test_cohere_embed_batch(self) -> None: "truncate": None, "embedding_types": None, } + + +class TestBedrockEmbeddingResilience: + """Tracing failures must never break the caller; response body must remain usable.""" + + def test_embedding_failure_does_not_break_client(self) -> None: + from botocore.response import StreamingBody + + from openlayer.lib.integrations.bedrock_tracer import trace_bedrock + + body_bytes = json.dumps( + {"embedding": [0.1, 0.2], "inputTextTokenCount": 4} + ).encode("utf-8") + mock_client = MagicMock() + mock_client.invoke_model.return_value = { + "body": StreamingBody(io.BytesIO(body_bytes), len(body_bytes)) + } + traced = trace_bedrock(mock_client) + + with patch( + "openlayer.lib.tracing.tracer.add_embedding_step_to_trace", + side_effect=RuntimeError("backend down"), + ): + response = traced.invoke_model( + modelId="amazon.titan-embed-text-v2:0", + body=json.dumps({"inputText": "hi"}), + ) + + # Caller still gets the real response. + assert response["body"].read() == body_bytes + + def test_embedding_response_body_is_replayable(self) -> None: + from botocore.response import StreamingBody + + from openlayer.lib.integrations.bedrock_tracer import trace_bedrock + + body_bytes = json.dumps( + {"embedding": [0.9, 0.8, 0.7], "inputTextTokenCount": 3} + ).encode("utf-8") + mock_client = MagicMock() + mock_client.invoke_model.return_value = { + "body": StreamingBody(io.BytesIO(body_bytes), len(body_bytes)) + } + traced = trace_bedrock(mock_client) + + with patch("openlayer.lib.tracing.tracer.add_embedding_step_to_trace"): + response = traced.invoke_model( + modelId="amazon.titan-embed-text-v2:0", + body=json.dumps({"inputText": "x"}), + ) + + # The body must be readable by the caller after tracing has consumed it. + assert response["body"].read() == body_bytes From 64b46a68f712b57bae3e498913db6cccbd495ac6 Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Tue, 28 Apr 2026 12:13:11 -0300 Subject: [PATCH 09/17] feat(litellm): patch litellm.embedding and trace single-input calls (OPEN-10480) Co-Authored-By: Claude Opus 4.7 (1M context) --- .../lib/integrations/litellm_tracer.py | 106 +++++++++++++++++- tests/test_litellm_integration.py | 62 +++++++++- 2 files changed, 166 insertions(+), 2 deletions(-) diff --git a/src/openlayer/lib/integrations/litellm_tracer.py b/src/openlayer/lib/integrations/litellm_tracer.py index d4545e90..e438adef 100644 --- a/src/openlayer/lib/integrations/litellm_tracer.py +++ b/src/openlayer/lib/integrations/litellm_tracer.py @@ -96,8 +96,24 @@ def traced_completion(*args, **kwargs): ) litellm.completion = traced_completion + + # Patch litellm.embedding to trace embedding calls. + original_embedding = litellm.embedding + + @wraps(original_embedding) + def traced_embedding(*args, **kwargs): + inference_id = kwargs.pop("inference_id", None) + return handle_embedding( + *args, + **kwargs, + embedding_func=original_embedding, + inference_id=inference_id, + ) + + litellm.embedding = traced_embedding + _litellm_traced = True - logger.debug("litellm.completion has been patched for Openlayer tracing") + logger.debug("litellm.completion and litellm.embedding have been patched for Openlayer tracing") def handle_streaming_completion( @@ -337,6 +353,94 @@ def handle_non_streaming_completion( return response +def handle_embedding( + embedding_func: callable, + *args, + inference_id: Optional[str] = None, + **kwargs, +) -> Any: + """Handle a single litellm.embedding() invocation.""" + start_time = time.time() + response = embedding_func(*args, **kwargs) + end_time = time.time() + + try: + model_name = kwargs.get("model", getattr(response, "model", "unknown")) + provider = detect_provider_from_response(response, model_name) + embeddings, dim, count = _parse_embedding_response(response) + usage_data = extract_usage_from_response(response) + extra_metadata = extract_litellm_metadata(response, model_name) + cost = extra_metadata.get("cost", None) + + prompt_tokens = usage_data.get("prompt_tokens") or 0 + total_tokens = usage_data.get("total_tokens") or prompt_tokens + + tracer.add_embedding_step_to_trace( + name="LiteLLM Embedding", + end_time=end_time, + inputs={"input": kwargs.get("input")}, + output=embeddings, + latency=(end_time - start_time) * 1000, + tokens=total_tokens, + prompt_tokens=prompt_tokens, + model=model_name, + model_parameters=_get_embedding_model_parameters(kwargs), + embedding_dimensions=dim, + embedding_count=count, + raw_output=( + response.model_dump() + if hasattr(response, "model_dump") + else str(response) + ), + provider=provider, + cost=cost, + id=inference_id, + metadata={ + "provider": provider, + "litellm_model": model_name, + **extra_metadata, + }, + ) + except Exception as e: + logger.error( + "Failed to trace the LiteLLM embedding request with Openlayer. %s", e + ) + + return response + + +def _parse_embedding_response(response: Any) -> tuple: + """Returns (embeddings, dimensions, count). Mirrors OpenAI EmbeddingResponse.""" + try: + data = getattr(response, "data", None) + if data is None and isinstance(response, dict): + data = response.get("data", []) + if not data: + return [], 0, 0 + embeddings = [ + item["embedding"] if isinstance(item, dict) else item.embedding + for item in data + ] + if not embeddings: + return [], 0, 0 + if len(embeddings) == 1: + return embeddings[0], len(embeddings[0]), 1 + return embeddings, len(embeddings[0]), len(embeddings) + except Exception: + return [], 0, 0 + + +def _get_embedding_model_parameters(kwargs: Dict[str, Any]) -> Dict[str, Any]: + return { + "dimensions": kwargs.get("dimensions"), + "encoding_format": kwargs.get("encoding_format"), + "user": kwargs.get("user"), + "timeout": kwargs.get("timeout"), + "api_base": kwargs.get("api_base"), + "api_version": kwargs.get("api_version"), + } + + def get_model_parameters(kwargs: Dict[str, Any]) -> Dict[str, Any]: """Gets the model parameters from the kwargs.""" return { diff --git a/tests/test_litellm_integration.py b/tests/test_litellm_integration.py index ecebbff8..0fccecb0 100644 --- a/tests/test_litellm_integration.py +++ b/tests/test_litellm_integration.py @@ -272,6 +272,66 @@ def test_detect_provider_with_litellm_method(self, mock_litellm: Mock) -> None: mock_response = Mock(spec=[]) # No special attributes provider = detect_provider_from_response(mock_response, 'gpt-4') - + assert provider == 'openai' mock_litellm.get_llm_provider.assert_called_once_with('gpt-4') + + +class TestLiteLLMEmbedding: + """Embedding calls must be traced via add_embedding_step_to_trace.""" + + @patch("openlayer.lib.integrations.litellm_tracer.HAVE_LITELLM", True) + @patch("openlayer.lib.integrations.litellm_tracer.litellm") + def test_trace_litellm_patches_embedding(self, mock_litellm: Mock) -> None: + from openlayer.lib.integrations import litellm_tracer + + litellm_tracer._litellm_traced = False + + original_embedding = Mock() + mock_litellm.embedding = original_embedding + mock_litellm.completion = Mock() + + from openlayer.lib.integrations.litellm_tracer import trace_litellm + + trace_litellm() + + assert mock_litellm.embedding != original_embedding + assert callable(mock_litellm.embedding) + + @patch("openlayer.lib.integrations.litellm_tracer.HAVE_LITELLM", True) + def test_handle_embedding_single_input(self) -> None: + from openlayer.lib.integrations.litellm_tracer import handle_embedding + + fake_response = Mock() + fake_response.model = "text-embedding-3-small" + fake_response.data = [Mock(embedding=[0.1, 0.2, 0.3])] + fake_response.usage = Mock(prompt_tokens=4, total_tokens=4) + fake_response.model_dump = Mock( + return_value={"model": "text-embedding-3-small"} + ) + fake_response._hidden_params = {} + + embedding_func = Mock(return_value=fake_response) + + with patch( + "openlayer.lib.tracing.tracer.add_embedding_step_to_trace" + ) as mock_add: + result = handle_embedding( + embedding_func=embedding_func, + model="text-embedding-3-small", + input="hello", + inference_id="custom-id", + ) + + assert result is fake_response + mock_add.assert_called_once() + kwargs = mock_add.call_args.kwargs + assert kwargs["name"] == "LiteLLM Embedding" + assert kwargs["model"] == "text-embedding-3-small" + assert kwargs["inputs"] == {"input": "hello"} + assert kwargs["output"] == [0.1, 0.2, 0.3] + assert kwargs["embedding_dimensions"] == 3 + assert kwargs["embedding_count"] == 1 + assert kwargs["prompt_tokens"] == 4 + assert kwargs["tokens"] == 4 + assert kwargs["id"] == "custom-id" From 2a60ce5542245f9fd09cc7419ab0d0a3b3052e77 Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Tue, 28 Apr 2026 12:13:45 -0300 Subject: [PATCH 10/17] test(litellm): cover batch embedding parsing (OPEN-10480) Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/test_litellm_integration.py | 38 +++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/test_litellm_integration.py b/tests/test_litellm_integration.py index 0fccecb0..951d68a4 100644 --- a/tests/test_litellm_integration.py +++ b/tests/test_litellm_integration.py @@ -335,3 +335,41 @@ def test_handle_embedding_single_input(self) -> None: assert kwargs["prompt_tokens"] == 4 assert kwargs["tokens"] == 4 assert kwargs["id"] == "custom-id" + + @patch("openlayer.lib.integrations.litellm_tracer.HAVE_LITELLM", True) + def test_handle_embedding_batch_input(self) -> None: + from openlayer.lib.integrations.litellm_tracer import handle_embedding + + fake_response = Mock() + fake_response.model = "text-embedding-3-small" + fake_response.data = [ + Mock(embedding=[0.1, 0.2, 0.3]), + Mock(embedding=[0.4, 0.5, 0.6]), + Mock(embedding=[0.7, 0.8, 0.9]), + ] + fake_response.usage = Mock(prompt_tokens=12, total_tokens=12) + fake_response.model_dump = Mock(return_value={}) + fake_response._hidden_params = {} + + embedding_func = Mock(return_value=fake_response) + + with patch( + "openlayer.lib.tracing.tracer.add_embedding_step_to_trace" + ) as mock_add: + handle_embedding( + embedding_func=embedding_func, + model="text-embedding-3-small", + input=["a", "b", "c"], + ) + + kwargs = mock_add.call_args.kwargs + assert kwargs["inputs"] == {"input": ["a", "b", "c"]} + assert kwargs["output"] == [ + [0.1, 0.2, 0.3], + [0.4, 0.5, 0.6], + [0.7, 0.8, 0.9], + ] + assert kwargs["embedding_dimensions"] == 3 + assert kwargs["embedding_count"] == 3 + assert kwargs["prompt_tokens"] == 12 + assert kwargs["tokens"] == 12 From 018fe5dc67bc09958c16274ad910d95317510f0e Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Tue, 28 Apr 2026 12:14:30 -0300 Subject: [PATCH 11/17] test(litellm): cover embedding cost, failure isolation, completion regression (OPEN-10480) Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/test_litellm_integration.py | 66 +++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/tests/test_litellm_integration.py b/tests/test_litellm_integration.py index 951d68a4..b7026255 100644 --- a/tests/test_litellm_integration.py +++ b/tests/test_litellm_integration.py @@ -373,3 +373,69 @@ def test_handle_embedding_batch_input(self) -> None: assert kwargs["embedding_count"] == 3 assert kwargs["prompt_tokens"] == 12 assert kwargs["tokens"] == 12 + + @patch("openlayer.lib.integrations.litellm_tracer.HAVE_LITELLM", True) + def test_handle_embedding_cost_from_hidden_params(self) -> None: + from openlayer.lib.integrations.litellm_tracer import handle_embedding + + fake_response = Mock() + fake_response.model = "text-embedding-3-small" + fake_response.data = [Mock(embedding=[0.1, 0.2])] + fake_response.usage = Mock(prompt_tokens=2, total_tokens=2) + fake_response.model_dump = Mock(return_value={}) + fake_response._hidden_params = {"response_cost": 0.0000123} + + with patch( + "openlayer.lib.tracing.tracer.add_embedding_step_to_trace" + ) as mock_add: + handle_embedding( + embedding_func=Mock(return_value=fake_response), + model="text-embedding-3-small", + input="x", + ) + + assert mock_add.call_args.kwargs["cost"] == 0.0000123 + + @patch("openlayer.lib.integrations.litellm_tracer.HAVE_LITELLM", True) + def test_handle_embedding_failure_does_not_break_client(self) -> None: + from openlayer.lib.integrations.litellm_tracer import handle_embedding + + fake_response = Mock() + fake_response.model = "text-embedding-3-small" + fake_response.data = [Mock(embedding=[0.1])] + fake_response.usage = Mock(prompt_tokens=1, total_tokens=1) + fake_response.model_dump = Mock(return_value={}) + fake_response._hidden_params = {} + + with patch( + "openlayer.lib.tracing.tracer.add_embedding_step_to_trace", + side_effect=RuntimeError("backend down"), + ): + result = handle_embedding( + embedding_func=Mock(return_value=fake_response), + model="text-embedding-3-small", + input="x", + ) + + assert result is fake_response + + @patch("openlayer.lib.integrations.litellm_tracer.HAVE_LITELLM", True) + @patch("openlayer.lib.integrations.litellm_tracer.litellm") + def test_completion_path_unchanged_after_embedding_patch( + self, mock_litellm: Mock + ) -> None: + """Patching embedding must not affect completion behaviour.""" + from openlayer.lib.integrations import litellm_tracer + + litellm_tracer._litellm_traced = False + + original_completion = Mock() + mock_litellm.completion = original_completion + mock_litellm.embedding = Mock() + + from openlayer.lib.integrations.litellm_tracer import trace_litellm + + trace_litellm() + + assert mock_litellm.completion != original_completion + assert callable(mock_litellm.completion) From 3e5ce68614097e3f85fe09f6ac198b731c346093 Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Tue, 28 Apr 2026 12:15:26 -0300 Subject: [PATCH 12/17] feat(openai): add shared embedding parsing helpers (OPEN-10480) Co-Authored-By: Claude Opus 4.7 (1M context) --- .../integrations/_openai_embedding_common.py | 37 ++++++++ tests/test_openai_embedding_helpers.py | 94 +++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 src/openlayer/lib/integrations/_openai_embedding_common.py create mode 100644 tests/test_openai_embedding_helpers.py diff --git a/src/openlayer/lib/integrations/_openai_embedding_common.py b/src/openlayer/lib/integrations/_openai_embedding_common.py new file mode 100644 index 00000000..9660611c --- /dev/null +++ b/src/openlayer/lib/integrations/_openai_embedding_common.py @@ -0,0 +1,37 @@ +"""Shared parsing helpers for OpenAI sync + async embedding tracers.""" + +from typing import Any, Dict, List, Tuple, Union + + +def parse_embedding_response( + response: Any, +) -> Tuple[Union[List[float], List[List[float]]], int, int]: + """Extract (embeddings, dimensions, count) from an OpenAI EmbeddingResponse. + + For a single input, returns the vector directly. + For a batch, returns a list of vectors. + """ + try: + data = getattr(response, "data", None) + if data is None: + return [], 0, 0 + embeddings = [ + item["embedding"] if isinstance(item, dict) else item.embedding + for item in data + ] + if not embeddings: + return [], 0, 0 + if len(embeddings) == 1: + return embeddings[0], len(embeddings[0]), 1 + return embeddings, len(embeddings[0]), len(embeddings) + except Exception: + return [], 0, 0 + + +def get_embedding_model_parameters(kwargs: Dict[str, Any]) -> Dict[str, Any]: + """Extract embedding-relevant model parameters from create() kwargs.""" + return { + "dimensions": kwargs.get("dimensions"), + "encoding_format": kwargs.get("encoding_format"), + "user": kwargs.get("user"), + } diff --git a/tests/test_openai_embedding_helpers.py b/tests/test_openai_embedding_helpers.py new file mode 100644 index 00000000..5f6bb2d2 --- /dev/null +++ b/tests/test_openai_embedding_helpers.py @@ -0,0 +1,94 @@ +"""Test shared OpenAI embedding parsers.""" + +from unittest.mock import Mock + + +class TestParseEmbeddingResponse: + def test_single_vector(self) -> None: + from openlayer.lib.integrations._openai_embedding_common import ( + parse_embedding_response, + ) + + response = Mock() + response.data = [Mock(embedding=[0.1, 0.2, 0.3, 0.4])] + + embeddings, dim, count = parse_embedding_response(response) + assert embeddings == [0.1, 0.2, 0.3, 0.4] + assert dim == 4 + assert count == 1 + + def test_batch_vectors(self) -> None: + from openlayer.lib.integrations._openai_embedding_common import ( + parse_embedding_response, + ) + + response = Mock() + response.data = [ + Mock(embedding=[0.1, 0.2]), + Mock(embedding=[0.3, 0.4]), + ] + + embeddings, dim, count = parse_embedding_response(response) + assert embeddings == [[0.1, 0.2], [0.3, 0.4]] + assert dim == 2 + assert count == 2 + + def test_empty_data(self) -> None: + from openlayer.lib.integrations._openai_embedding_common import ( + parse_embedding_response, + ) + + response = Mock() + response.data = [] + + embeddings, dim, count = parse_embedding_response(response) + assert embeddings == [] + assert dim == 0 + assert count == 0 + + def test_dict_data_items(self) -> None: + """Some response shapes carry dict items instead of model objects.""" + from openlayer.lib.integrations._openai_embedding_common import ( + parse_embedding_response, + ) + + response = Mock() + response.data = [{"embedding": [0.5, 0.6]}] + + embeddings, dim, count = parse_embedding_response(response) + assert embeddings == [0.5, 0.6] + assert dim == 2 + assert count == 1 + + +class TestGetEmbeddingModelParameters: + def test_extracts_relevant_params(self) -> None: + from openlayer.lib.integrations._openai_embedding_common import ( + get_embedding_model_parameters, + ) + + params = get_embedding_model_parameters( + { + "dimensions": 1536, + "encoding_format": "float", + "user": "u1", + "irrelevant": "ignored", + } + ) + assert params == { + "dimensions": 1536, + "encoding_format": "float", + "user": "u1", + } + + def test_missing_params_default_to_none(self) -> None: + from openlayer.lib.integrations._openai_embedding_common import ( + get_embedding_model_parameters, + ) + + params = get_embedding_model_parameters({}) + assert params == { + "dimensions": None, + "encoding_format": None, + "user": None, + } From fc456c63fdc6ad47645bea16c85cbec1c076e363 Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Tue, 28 Apr 2026 12:17:12 -0300 Subject: [PATCH 13/17] feat(openai): trace sync embeddings.create (OPEN-10480) Co-Authored-By: Claude Opus 4.7 (1M context) --- .../lib/integrations/openai_tracer.py | 73 +++++++++++++ tests/test_openai_embedding_integration.py | 100 ++++++++++++++++++ 2 files changed, 173 insertions(+) create mode 100644 tests/test_openai_embedding_integration.py diff --git a/src/openlayer/lib/integrations/openai_tracer.py b/src/openlayer/lib/integrations/openai_tracer.py index 37e6fe58..64bf606b 100644 --- a/src/openlayer/lib/integrations/openai_tracer.py +++ b/src/openlayer/lib/integrations/openai_tracer.py @@ -153,6 +153,22 @@ def traced_responses_create_func(*args, **kwargs): else: logger.debug("Responses API not available in this OpenAI client version") + # Patch Embeddings API + if hasattr(client, "embeddings"): + embeddings_create_func = client.embeddings.create + + @wraps(embeddings_create_func) + def traced_embeddings_create_func(*args, **kwargs): + inference_id = kwargs.pop("inference_id", None) + return handle_embedding( + *args, + **kwargs, + original_func=embeddings_create_func, + inference_id=inference_id, + ) + + client.embeddings.create = traced_embeddings_create_func + return client @@ -1611,6 +1627,63 @@ def parse_structured_output_data(response: Any) -> Union[str, Dict[str, Any], No return parse_non_streaming_output_data(response) +# ----------------------------- OpenAI Embeddings ---------------------------- # +def handle_embedding( + original_func: callable, + *args, + inference_id: Optional[str] = None, + **kwargs, +) -> Any: + """Trace a sync OpenAI client.embeddings.create() call.""" + from ._openai_embedding_common import ( + get_embedding_model_parameters as _get_embedding_model_parameters, + ) + from ._openai_embedding_common import ( + parse_embedding_response as _parse_embedding_response, + ) + + start_time = time.time() + response = original_func(*args, **kwargs) + end_time = time.time() + + try: + model_name = getattr(response, "model", kwargs.get("model", "unknown")) + embeddings, dim, count = _parse_embedding_response(response) + usage = getattr(response, "usage", None) + prompt_tokens = getattr(usage, "prompt_tokens", 0) if usage else 0 + total_tokens = ( + getattr(usage, "total_tokens", prompt_tokens) if usage else prompt_tokens + ) + + tracer.add_embedding_step_to_trace( + name="OpenAI Embedding", + end_time=end_time, + inputs={"input": kwargs.get("input")}, + output=embeddings, + latency=(end_time - start_time) * 1000, + tokens=total_tokens, + prompt_tokens=prompt_tokens, + model=model_name, + model_parameters=_get_embedding_model_parameters(kwargs), + embedding_dimensions=dim, + embedding_count=count, + raw_output=( + response.model_dump() + if hasattr(response, "model_dump") + else str(response) + ), + provider="OpenAI", + id=inference_id, + metadata={"provider": "OpenAI"}, + ) + except Exception as e: + logger.error( + "Failed to trace the OpenAI embedding request with Openlayer. %s", e + ) + + return response + + # --------------------------- OpenAI Assistants API -------------------------- # def trace_openai_assistant_thread_run( client: "openai.OpenAI", run: "openai.types.beta.threads.run.Run" diff --git a/tests/test_openai_embedding_integration.py b/tests/test_openai_embedding_integration.py new file mode 100644 index 00000000..c57b1ee5 --- /dev/null +++ b/tests/test_openai_embedding_integration.py @@ -0,0 +1,100 @@ +"""Test OpenAI embedding integration (sync).""" + +from unittest.mock import MagicMock, Mock, patch + + +class TestOpenAISyncEmbedding: + """Sync OpenAI client.embeddings.create must be traced.""" + + def _fake_response(self, embeddings, prompt_tokens=4, model="text-embedding-3-small"): + response = Mock() + response.model = model + response.data = [Mock(embedding=v) for v in embeddings] + response.usage = Mock(prompt_tokens=prompt_tokens, total_tokens=prompt_tokens) + response.model_dump = Mock(return_value={"model": model}) + return response + + def test_handle_embedding_single_input(self) -> None: + from openlayer.lib.integrations.openai_tracer import handle_embedding + + fake = self._fake_response([[0.1, 0.2, 0.3]]) + original = Mock(return_value=fake) + + with patch( + "openlayer.lib.tracing.tracer.add_embedding_step_to_trace" + ) as mock_add: + result = handle_embedding( + original_func=original, + model="text-embedding-3-small", + input="hello", + inference_id="abc", + ) + + assert result is fake + kwargs = mock_add.call_args.kwargs + assert kwargs["name"] == "OpenAI Embedding" + assert kwargs["provider"] == "OpenAI" + assert kwargs["model"] == "text-embedding-3-small" + assert kwargs["inputs"] == {"input": "hello"} + assert kwargs["output"] == [0.1, 0.2, 0.3] + assert kwargs["embedding_dimensions"] == 3 + assert kwargs["embedding_count"] == 1 + assert kwargs["prompt_tokens"] == 4 + assert kwargs["tokens"] == 4 + assert kwargs["id"] == "abc" + + def test_handle_embedding_batch_input(self) -> None: + from openlayer.lib.integrations.openai_tracer import handle_embedding + + fake = self._fake_response( + [[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]], prompt_tokens=9 + ) + original = Mock(return_value=fake) + + with patch( + "openlayer.lib.tracing.tracer.add_embedding_step_to_trace" + ) as mock_add: + handle_embedding( + original_func=original, + model="text-embedding-3-small", + input=["one", "two", "three"], + ) + + kwargs = mock_add.call_args.kwargs + assert kwargs["inputs"] == {"input": ["one", "two", "three"]} + assert kwargs["output"] == [[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]] + assert kwargs["embedding_dimensions"] == 2 + assert kwargs["embedding_count"] == 3 + assert kwargs["prompt_tokens"] == 9 + + def test_handle_embedding_failure_does_not_break_client(self) -> None: + from openlayer.lib.integrations.openai_tracer import handle_embedding + + fake = self._fake_response([[0.0]]) + original = Mock(return_value=fake) + + with patch( + "openlayer.lib.tracing.tracer.add_embedding_step_to_trace", + side_effect=RuntimeError("backend down"), + ): + result = handle_embedding( + original_func=original, + model="text-embedding-3-small", + input="x", + ) + + assert result is fake + + def test_trace_openai_patches_embeddings_create(self) -> None: + """After trace_openai, client.embeddings.create is replaced.""" + import openai + + from openlayer.lib.integrations.openai_tracer import trace_openai + + # Make client appear like a real OpenAI (not Azure) client. + client = MagicMock(spec=openai.OpenAI) + original_create = client.embeddings.create + + traced_client = trace_openai(client) + + assert traced_client.embeddings.create is not original_create From 242e0664a3cd0d1fa62a3cb1cf9c25b21212e05f Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Tue, 28 Apr 2026 12:18:57 -0300 Subject: [PATCH 14/17] feat(openai): trace async embeddings.create (OPEN-10480) Co-Authored-By: Claude Opus 4.7 (1M context) --- .../lib/integrations/async_openai_tracer.py | 74 +++++++++++++ ...test_async_openai_embedding_integration.py | 100 ++++++++++++++++++ 2 files changed, 174 insertions(+) create mode 100644 tests/test_async_openai_embedding_integration.py diff --git a/src/openlayer/lib/integrations/async_openai_tracer.py b/src/openlayer/lib/integrations/async_openai_tracer.py index 01a73ba8..d9803255 100644 --- a/src/openlayer/lib/integrations/async_openai_tracer.py +++ b/src/openlayer/lib/integrations/async_openai_tracer.py @@ -16,6 +16,7 @@ if TYPE_CHECKING: import openai +from ..tracing import tracer from .openai_tracer import ( get_model_parameters, create_trace_args, @@ -155,6 +156,22 @@ async def traced_responses_create_func(*args, **kwargs): else: logger.debug("Responses API not available in this AsyncOpenAI client version") + # Patch Embeddings API + if hasattr(client, "embeddings"): + embeddings_create_func = client.embeddings.create + + @wraps(embeddings_create_func) + async def traced_embeddings_create_func(*args, **kwargs): + inference_id = kwargs.pop("inference_id", None) + return await handle_embedding_async( + *args, + **kwargs, + original_func=embeddings_create_func, + inference_id=inference_id, + ) + + client.embeddings.create = traced_embeddings_create_func + return client @@ -698,3 +715,60 @@ async def handle_async_non_streaming_parse( ) return response + + +# ----------------------------- Async Embeddings ----------------------------- # +async def handle_embedding_async( + original_func: callable, + *args, + inference_id: Optional[str] = None, + **kwargs, +) -> Any: + """Trace an async AsyncOpenAI client.embeddings.create() call.""" + from ._openai_embedding_common import ( + get_embedding_model_parameters as _get_embedding_model_parameters, + ) + from ._openai_embedding_common import ( + parse_embedding_response as _parse_embedding_response, + ) + + start_time = time.time() + response = await original_func(*args, **kwargs) + end_time = time.time() + + try: + model_name = getattr(response, "model", kwargs.get("model", "unknown")) + embeddings, dim, count = _parse_embedding_response(response) + usage = getattr(response, "usage", None) + prompt_tokens = getattr(usage, "prompt_tokens", 0) if usage else 0 + total_tokens = ( + getattr(usage, "total_tokens", prompt_tokens) if usage else prompt_tokens + ) + + tracer.add_embedding_step_to_trace( + name="OpenAI Embedding", + end_time=end_time, + inputs={"input": kwargs.get("input")}, + output=embeddings, + latency=(end_time - start_time) * 1000, + tokens=total_tokens, + prompt_tokens=prompt_tokens, + model=model_name, + model_parameters=_get_embedding_model_parameters(kwargs), + embedding_dimensions=dim, + embedding_count=count, + raw_output=( + response.model_dump() + if hasattr(response, "model_dump") + else str(response) + ), + provider="OpenAI", + id=inference_id, + metadata={"provider": "OpenAI"}, + ) + except Exception as e: + logger.error( + "Failed to trace the OpenAI embedding request with Openlayer. %s", e + ) + + return response diff --git a/tests/test_async_openai_embedding_integration.py b/tests/test_async_openai_embedding_integration.py new file mode 100644 index 00000000..389794ff --- /dev/null +++ b/tests/test_async_openai_embedding_integration.py @@ -0,0 +1,100 @@ +"""Test OpenAI embedding integration (async).""" + +import asyncio +from unittest.mock import AsyncMock, MagicMock, Mock, patch + + +class TestOpenAIAsyncEmbedding: + def _fake_response(self, embeddings, prompt_tokens=4, model="text-embedding-3-small"): + response = Mock() + response.model = model + response.data = [Mock(embedding=v) for v in embeddings] + response.usage = Mock(prompt_tokens=prompt_tokens, total_tokens=prompt_tokens) + response.model_dump = Mock(return_value={"model": model}) + return response + + def test_handle_embedding_async_single_input(self) -> None: + from openlayer.lib.integrations.async_openai_tracer import ( + handle_embedding_async, + ) + + fake = self._fake_response([[0.1, 0.2, 0.3]]) + original = AsyncMock(return_value=fake) + + with patch( + "openlayer.lib.tracing.tracer.add_embedding_step_to_trace" + ) as mock_add: + result = asyncio.run( + handle_embedding_async( + original_func=original, + model="text-embedding-3-small", + input="hello", + ) + ) + + assert result is fake + kwargs = mock_add.call_args.kwargs + assert kwargs["name"] == "OpenAI Embedding" + assert kwargs["provider"] == "OpenAI" + assert kwargs["output"] == [0.1, 0.2, 0.3] + assert kwargs["embedding_dimensions"] == 3 + assert kwargs["embedding_count"] == 1 + + def test_handle_embedding_async_batch_input(self) -> None: + from openlayer.lib.integrations.async_openai_tracer import ( + handle_embedding_async, + ) + + fake = self._fake_response([[0.1, 0.2], [0.3, 0.4]], prompt_tokens=6) + original = AsyncMock(return_value=fake) + + with patch( + "openlayer.lib.tracing.tracer.add_embedding_step_to_trace" + ) as mock_add: + asyncio.run( + handle_embedding_async( + original_func=original, + model="text-embedding-3-small", + input=["a", "b"], + ) + ) + + kwargs = mock_add.call_args.kwargs + assert kwargs["inputs"] == {"input": ["a", "b"]} + assert kwargs["output"] == [[0.1, 0.2], [0.3, 0.4]] + assert kwargs["embedding_count"] == 2 + assert kwargs["prompt_tokens"] == 6 + + def test_handle_embedding_async_failure_does_not_break_client(self) -> None: + from openlayer.lib.integrations.async_openai_tracer import ( + handle_embedding_async, + ) + + fake = self._fake_response([[0.0]]) + original = AsyncMock(return_value=fake) + + with patch( + "openlayer.lib.tracing.tracer.add_embedding_step_to_trace", + side_effect=RuntimeError("backend down"), + ): + result = asyncio.run( + handle_embedding_async( + original_func=original, + model="text-embedding-3-small", + input="x", + ) + ) + + assert result is fake + + def test_trace_async_openai_patches_embeddings_create(self) -> None: + import openai + + from openlayer.lib.integrations.async_openai_tracer import trace_async_openai + + client = MagicMock(spec=openai.AsyncOpenAI) + original_create = client.embeddings.create + + traced_client = trace_async_openai(client) + + assert traced_client.embeddings.create is not original_create From 6be00edcd1cc1686455049e719e37c8400ee56aa Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Tue, 28 Apr 2026 12:21:17 -0300 Subject: [PATCH 15/17] chore: apply ruff import sort to embedding test files (OPEN-10480) Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/test_async_openai_embedding_integration.py | 2 +- tests/test_openai_embedding_integration.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_async_openai_embedding_integration.py b/tests/test_async_openai_embedding_integration.py index 389794ff..7c7ab2e8 100644 --- a/tests/test_async_openai_embedding_integration.py +++ b/tests/test_async_openai_embedding_integration.py @@ -1,7 +1,7 @@ """Test OpenAI embedding integration (async).""" import asyncio -from unittest.mock import AsyncMock, MagicMock, Mock, patch +from unittest.mock import Mock, AsyncMock, MagicMock, patch class TestOpenAIAsyncEmbedding: diff --git a/tests/test_openai_embedding_integration.py b/tests/test_openai_embedding_integration.py index c57b1ee5..680cd153 100644 --- a/tests/test_openai_embedding_integration.py +++ b/tests/test_openai_embedding_integration.py @@ -1,6 +1,6 @@ """Test OpenAI embedding integration (sync).""" -from unittest.mock import MagicMock, Mock, patch +from unittest.mock import Mock, MagicMock, patch class TestOpenAISyncEmbedding: From 0f1191a3cd13f07d56c277c19b3e6f79ed4586fe Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Tue, 28 Apr 2026 13:09:50 -0300 Subject: [PATCH 16/17] chore: silence pyright noise on embedding test files (OPEN-10480) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the same file-level pragma already used by test_portkey_integration.py to suppress reportUnknown* and reportMissingParameterType — these come from openlayer.lib.integrations being in pyright's ignore list, which causes imports from there to be typed as Unknown. Per-line pyright ignores added on direct imports of botocore.response and openai, which are not present in the lint job's environment. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/test_async_openai_embedding_integration.py | 6 +++++- tests/test_bedrock_integration.py | 16 ++++++++++------ tests/test_litellm_integration.py | 16 ++++++++++++---- tests/test_openai_embedding_helpers.py | 4 ++++ tests/test_openai_embedding_integration.py | 6 +++++- 5 files changed, 36 insertions(+), 12 deletions(-) diff --git a/tests/test_async_openai_embedding_integration.py b/tests/test_async_openai_embedding_integration.py index 7c7ab2e8..5af59f18 100644 --- a/tests/test_async_openai_embedding_integration.py +++ b/tests/test_async_openai_embedding_integration.py @@ -1,5 +1,9 @@ """Test OpenAI embedding integration (async).""" +# openlayer.lib.integrations is in pyright's ignore list, so imports get +# unknown/partially unknown types; disable these diagnostics for this test file only. +# pyright: reportUnknownMemberType=false, reportUnknownVariableType=false, reportUnknownArgumentType=false, reportUnknownParameterType=false, reportMissingParameterType=false + import asyncio from unittest.mock import Mock, AsyncMock, MagicMock, patch @@ -88,7 +92,7 @@ def test_handle_embedding_async_failure_does_not_break_client(self) -> None: assert result is fake def test_trace_async_openai_patches_embeddings_create(self) -> None: - import openai + import openai # pyright: ignore[reportMissingImports] from openlayer.lib.integrations.async_openai_tracer import trace_async_openai diff --git a/tests/test_bedrock_integration.py b/tests/test_bedrock_integration.py index 5c6ca3bb..1b53bef5 100644 --- a/tests/test_bedrock_integration.py +++ b/tests/test_bedrock_integration.py @@ -1,5 +1,9 @@ """Test AWS Bedrock integration.""" +# openlayer.lib.integrations is in pyright's ignore list, so imports from bedrock_tracer +# get unknown/partially unknown types; disable these diagnostics for this test file only. +# pyright: reportUnknownMemberType=false, reportUnknownVariableType=false, reportUnknownArgumentType=false, reportUnknownParameterType=false, reportMissingParameterType=false + import io import json from unittest.mock import MagicMock, patch @@ -10,7 +14,7 @@ class TestBedrockChatRegression: def _make_anthropic_response(self, body_dict): """Build a response dict mimicking what bedrock-runtime returns.""" - from botocore.response import StreamingBody + from botocore.response import StreamingBody # pyright: ignore[reportMissingImports] body_bytes = json.dumps(body_dict).encode("utf-8") return {"body": StreamingBody(io.BytesIO(body_bytes), len(body_bytes))} @@ -88,7 +92,7 @@ def test_is_embedding_model_handles_empty_string(self) -> None: def test_traced_invoke_routes_embedding_to_new_handler(self) -> None: """An embedding modelId must call handle_embedding_invoke, not the chat handler.""" - from botocore.response import StreamingBody + from botocore.response import StreamingBody # pyright: ignore[reportMissingImports] from openlayer.lib.integrations.bedrock_tracer import trace_bedrock @@ -120,7 +124,7 @@ class TestBedrockTitanEmbedding: """Titan v1 and v2 embedding requests produce well-formed embedding steps.""" def _titan_response(self, embedding, token_count): - from botocore.response import StreamingBody + from botocore.response import StreamingBody # pyright: ignore[reportMissingImports] body = json.dumps( {"embedding": embedding, "inputTextTokenCount": token_count} @@ -198,7 +202,7 @@ class TestBedrockCohereEmbedding: """Cohere v3 embedding produces a multi-vector batch step.""" def _cohere_response(self, embeddings): - from botocore.response import StreamingBody + from botocore.response import StreamingBody # pyright: ignore[reportMissingImports] body = json.dumps( { @@ -249,7 +253,7 @@ class TestBedrockEmbeddingResilience: """Tracing failures must never break the caller; response body must remain usable.""" def test_embedding_failure_does_not_break_client(self) -> None: - from botocore.response import StreamingBody + from botocore.response import StreamingBody # pyright: ignore[reportMissingImports] from openlayer.lib.integrations.bedrock_tracer import trace_bedrock @@ -275,7 +279,7 @@ def test_embedding_failure_does_not_break_client(self) -> None: assert response["body"].read() == body_bytes def test_embedding_response_body_is_replayable(self) -> None: - from botocore.response import StreamingBody + from botocore.response import StreamingBody # pyright: ignore[reportMissingImports] from openlayer.lib.integrations.bedrock_tracer import trace_bedrock diff --git a/tests/test_litellm_integration.py b/tests/test_litellm_integration.py index b7026255..c01e2372 100644 --- a/tests/test_litellm_integration.py +++ b/tests/test_litellm_integration.py @@ -300,7 +300,9 @@ def test_trace_litellm_patches_embedding(self, mock_litellm: Mock) -> None: @patch("openlayer.lib.integrations.litellm_tracer.HAVE_LITELLM", True) def test_handle_embedding_single_input(self) -> None: - from openlayer.lib.integrations.litellm_tracer import handle_embedding + from openlayer.lib.integrations.litellm_tracer import ( + handle_embedding, # pyright: ignore[reportUnknownVariableType] + ) fake_response = Mock() fake_response.model = "text-embedding-3-small" @@ -338,7 +340,9 @@ def test_handle_embedding_single_input(self) -> None: @patch("openlayer.lib.integrations.litellm_tracer.HAVE_LITELLM", True) def test_handle_embedding_batch_input(self) -> None: - from openlayer.lib.integrations.litellm_tracer import handle_embedding + from openlayer.lib.integrations.litellm_tracer import ( + handle_embedding, # pyright: ignore[reportUnknownVariableType] + ) fake_response = Mock() fake_response.model = "text-embedding-3-small" @@ -376,7 +380,9 @@ def test_handle_embedding_batch_input(self) -> None: @patch("openlayer.lib.integrations.litellm_tracer.HAVE_LITELLM", True) def test_handle_embedding_cost_from_hidden_params(self) -> None: - from openlayer.lib.integrations.litellm_tracer import handle_embedding + from openlayer.lib.integrations.litellm_tracer import ( + handle_embedding, # pyright: ignore[reportUnknownVariableType] + ) fake_response = Mock() fake_response.model = "text-embedding-3-small" @@ -398,7 +404,9 @@ def test_handle_embedding_cost_from_hidden_params(self) -> None: @patch("openlayer.lib.integrations.litellm_tracer.HAVE_LITELLM", True) def test_handle_embedding_failure_does_not_break_client(self) -> None: - from openlayer.lib.integrations.litellm_tracer import handle_embedding + from openlayer.lib.integrations.litellm_tracer import ( + handle_embedding, # pyright: ignore[reportUnknownVariableType] + ) fake_response = Mock() fake_response.model = "text-embedding-3-small" diff --git a/tests/test_openai_embedding_helpers.py b/tests/test_openai_embedding_helpers.py index 5f6bb2d2..75a9c7c2 100644 --- a/tests/test_openai_embedding_helpers.py +++ b/tests/test_openai_embedding_helpers.py @@ -1,5 +1,9 @@ """Test shared OpenAI embedding parsers.""" +# openlayer.lib.integrations is in pyright's ignore list, so imports get +# unknown/partially unknown types; disable these diagnostics for this test file only. +# pyright: reportUnknownMemberType=false, reportUnknownVariableType=false, reportUnknownArgumentType=false, reportUnknownParameterType=false, reportMissingParameterType=false + from unittest.mock import Mock diff --git a/tests/test_openai_embedding_integration.py b/tests/test_openai_embedding_integration.py index 680cd153..7a8577a5 100644 --- a/tests/test_openai_embedding_integration.py +++ b/tests/test_openai_embedding_integration.py @@ -1,5 +1,9 @@ """Test OpenAI embedding integration (sync).""" +# openlayer.lib.integrations is in pyright's ignore list, so imports get +# unknown/partially unknown types; disable these diagnostics for this test file only. +# pyright: reportUnknownMemberType=false, reportUnknownVariableType=false, reportUnknownArgumentType=false, reportUnknownParameterType=false, reportMissingParameterType=false + from unittest.mock import Mock, MagicMock, patch @@ -87,7 +91,7 @@ def test_handle_embedding_failure_does_not_break_client(self) -> None: def test_trace_openai_patches_embeddings_create(self) -> None: """After trace_openai, client.embeddings.create is replaced.""" - import openai + import openai # pyright: ignore[reportMissingImports] from openlayer.lib.integrations.openai_tracer import trace_openai From 5c4765b8dbc78a3929a41b04ecbb41e666e56dd3 Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Tue, 28 Apr 2026 13:37:02 -0300 Subject: [PATCH 17/17] refactor: simplify embedding tracer handlers (OPEN-10480) Code review findings addressed: - Move per-call imports of _openai_embedding_common to module-level (was in hot path of every embedding call). - Extract build_embedding_step_kwargs into _openai_embedding_common so that sync and async OpenAI handlers each become ~10 lines instead of ~50, and LiteLLM reuses the same kwargs assembly. - Drop LiteLLM's local _parse_embedding_response and _get_embedding_model_parameters; both now delegate to the shared helpers (LiteLLM-specific timeout/api_base/api_version/cost/metadata are layered on top of the common kwargs). - Type Bedrock _parse_embedding_output return as Tuple[Union[List[float], List[List[float]]], int, int] instead of bare tuple. Net: -34 lines across the 5 touched source files. Tests unchanged, all 77 embedding tests + 448 lib tests still green. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../integrations/_openai_embedding_common.py | 55 +++++++++++- .../lib/integrations/async_openai_tracer.py | 44 +++------ .../lib/integrations/bedrock_tracer.py | 4 +- .../lib/integrations/litellm_tracer.py | 89 ++++++------------- .../lib/integrations/openai_tracer.py | 44 +++------ 5 files changed, 101 insertions(+), 135 deletions(-) diff --git a/src/openlayer/lib/integrations/_openai_embedding_common.py b/src/openlayer/lib/integrations/_openai_embedding_common.py index 9660611c..429b6275 100644 --- a/src/openlayer/lib/integrations/_openai_embedding_common.py +++ b/src/openlayer/lib/integrations/_openai_embedding_common.py @@ -1,19 +1,21 @@ -"""Shared parsing helpers for OpenAI sync + async embedding tracers.""" +"""Shared parsing helpers for OpenAI-shaped embedding tracers (OpenAI, AsyncOpenAI, LiteLLM).""" -from typing import Any, Dict, List, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Union def parse_embedding_response( response: Any, ) -> Tuple[Union[List[float], List[List[float]]], int, int]: - """Extract (embeddings, dimensions, count) from an OpenAI EmbeddingResponse. + """Extract (embeddings, dimensions, count) from an OpenAI-shaped EmbeddingResponse. For a single input, returns the vector directly. For a batch, returns a list of vectors. """ try: data = getattr(response, "data", None) - if data is None: + if data is None and isinstance(response, dict): + data = response.get("data", []) + if not data: return [], 0, 0 embeddings = [ item["embedding"] if isinstance(item, dict) else item.embedding @@ -35,3 +37,48 @@ def get_embedding_model_parameters(kwargs: Dict[str, Any]) -> Dict[str, Any]: "encoding_format": kwargs.get("encoding_format"), "user": kwargs.get("user"), } + + +def build_embedding_step_kwargs( + response: Any, + call_kwargs: Dict[str, Any], + start_time: float, + end_time: float, + *, + name: str, + provider: str, + inference_id: Optional[str] = None, +) -> Dict[str, Any]: + """Build the kwargs to pass to ``tracer.add_embedding_step_to_trace``. + + Common boilerplate for OpenAI-shaped responses (OpenAI sync/async, LiteLLM). + Callers may layer extra fields (cost, extra_metadata, model_parameters) on + top of the returned dict before invoking the tracer helper. + """ + model_name = getattr(response, "model", call_kwargs.get("model", "unknown")) + embeddings, dim, count = parse_embedding_response(response) + usage = getattr(response, "usage", None) + prompt_tokens = getattr(usage, "prompt_tokens", 0) if usage else 0 + total_tokens = getattr(usage, "total_tokens", prompt_tokens) if usage else prompt_tokens + + return { + "name": name, + "end_time": end_time, + "inputs": {"input": call_kwargs.get("input")}, + "output": embeddings, + "latency": (end_time - start_time) * 1000, + "tokens": total_tokens, + "prompt_tokens": prompt_tokens, + "model": model_name, + "model_parameters": get_embedding_model_parameters(call_kwargs), + "embedding_dimensions": dim, + "embedding_count": count, + "raw_output": ( + response.model_dump() + if hasattr(response, "model_dump") + else str(response) + ), + "provider": provider, + "id": inference_id, + "metadata": {"provider": provider}, + } diff --git a/src/openlayer/lib/integrations/async_openai_tracer.py b/src/openlayer/lib/integrations/async_openai_tracer.py index d9803255..59369220 100644 --- a/src/openlayer/lib/integrations/async_openai_tracer.py +++ b/src/openlayer/lib/integrations/async_openai_tracer.py @@ -17,6 +17,7 @@ import openai from ..tracing import tracer +from ._openai_embedding_common import build_embedding_step_kwargs from .openai_tracer import ( get_model_parameters, create_trace_args, @@ -725,46 +726,21 @@ async def handle_embedding_async( **kwargs, ) -> Any: """Trace an async AsyncOpenAI client.embeddings.create() call.""" - from ._openai_embedding_common import ( - get_embedding_model_parameters as _get_embedding_model_parameters, - ) - from ._openai_embedding_common import ( - parse_embedding_response as _parse_embedding_response, - ) - start_time = time.time() response = await original_func(*args, **kwargs) end_time = time.time() try: - model_name = getattr(response, "model", kwargs.get("model", "unknown")) - embeddings, dim, count = _parse_embedding_response(response) - usage = getattr(response, "usage", None) - prompt_tokens = getattr(usage, "prompt_tokens", 0) if usage else 0 - total_tokens = ( - getattr(usage, "total_tokens", prompt_tokens) if usage else prompt_tokens - ) - tracer.add_embedding_step_to_trace( - name="OpenAI Embedding", - end_time=end_time, - inputs={"input": kwargs.get("input")}, - output=embeddings, - latency=(end_time - start_time) * 1000, - tokens=total_tokens, - prompt_tokens=prompt_tokens, - model=model_name, - model_parameters=_get_embedding_model_parameters(kwargs), - embedding_dimensions=dim, - embedding_count=count, - raw_output=( - response.model_dump() - if hasattr(response, "model_dump") - else str(response) - ), - provider="OpenAI", - id=inference_id, - metadata={"provider": "OpenAI"}, + **build_embedding_step_kwargs( + response, + kwargs, + start_time, + end_time, + name="OpenAI Embedding", + provider="OpenAI", + inference_id=inference_id, + ) ) except Exception as e: logger.error( diff --git a/src/openlayer/lib/integrations/bedrock_tracer.py b/src/openlayer/lib/integrations/bedrock_tracer.py index 6fb4cd06..a7834d65 100644 --- a/src/openlayer/lib/integrations/bedrock_tracer.py +++ b/src/openlayer/lib/integrations/bedrock_tracer.py @@ -5,7 +5,7 @@ import logging import time from functools import wraps -from typing import TYPE_CHECKING, Any, Dict, Iterator, Optional, Union +from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple, Union from botocore.response import StreamingBody @@ -237,7 +237,7 @@ def _parse_embedding_input(body_data: Dict[str, Any], model_id: str) -> Dict[str def _parse_embedding_output( response_data: Dict[str, Any], model_id: str -) -> tuple: +) -> Tuple[Union[List[float], List[List[float]]], int, int]: """Returns (embeddings, dimensions, count).""" if model_id.startswith("amazon.titan-embed"): emb = response_data.get("embedding", []) diff --git a/src/openlayer/lib/integrations/litellm_tracer.py b/src/openlayer/lib/integrations/litellm_tracer.py index e438adef..00fa411f 100644 --- a/src/openlayer/lib/integrations/litellm_tracer.py +++ b/src/openlayer/lib/integrations/litellm_tracer.py @@ -18,6 +18,7 @@ from ..tracing import tracer from ..tracing import enums as tracer_enums +from ._openai_embedding_common import build_embedding_step_kwargs logger = logging.getLogger(__name__) @@ -367,40 +368,38 @@ def handle_embedding( try: model_name = kwargs.get("model", getattr(response, "model", "unknown")) provider = detect_provider_from_response(response, model_name) - embeddings, dim, count = _parse_embedding_response(response) - usage_data = extract_usage_from_response(response) extra_metadata = extract_litellm_metadata(response, model_name) - cost = extra_metadata.get("cost", None) - - prompt_tokens = usage_data.get("prompt_tokens") or 0 - total_tokens = usage_data.get("total_tokens") or prompt_tokens + usage_data = extract_usage_from_response(response) - tracer.add_embedding_step_to_trace( + step_kwargs = build_embedding_step_kwargs( + response, + kwargs, + start_time, + end_time, name="LiteLLM Embedding", - end_time=end_time, - inputs={"input": kwargs.get("input")}, - output=embeddings, - latency=(end_time - start_time) * 1000, - tokens=total_tokens, - prompt_tokens=prompt_tokens, - model=model_name, - model_parameters=_get_embedding_model_parameters(kwargs), - embedding_dimensions=dim, - embedding_count=count, - raw_output=( - response.model_dump() - if hasattr(response, "model_dump") - else str(response) - ), provider=provider, - cost=cost, - id=inference_id, - metadata={ - "provider": provider, - "litellm_model": model_name, - **extra_metadata, - }, + inference_id=inference_id, ) + + # LiteLLM-specific overlays: usage uses LiteLLM's normalized dict, extra + # connection params, response cost, and provider metadata. + prompt_tokens = usage_data.get("prompt_tokens") or 0 + step_kwargs["prompt_tokens"] = prompt_tokens + step_kwargs["tokens"] = usage_data.get("total_tokens") or prompt_tokens + step_kwargs["model_parameters"] = { + **step_kwargs["model_parameters"], + "timeout": kwargs.get("timeout"), + "api_base": kwargs.get("api_base"), + "api_version": kwargs.get("api_version"), + } + step_kwargs["cost"] = extra_metadata.get("cost", None) + step_kwargs["metadata"] = { + **step_kwargs["metadata"], + "litellm_model": model_name, + **extra_metadata, + } + + tracer.add_embedding_step_to_trace(**step_kwargs) except Exception as e: logger.error( "Failed to trace the LiteLLM embedding request with Openlayer. %s", e @@ -409,38 +408,6 @@ def handle_embedding( return response -def _parse_embedding_response(response: Any) -> tuple: - """Returns (embeddings, dimensions, count). Mirrors OpenAI EmbeddingResponse.""" - try: - data = getattr(response, "data", None) - if data is None and isinstance(response, dict): - data = response.get("data", []) - if not data: - return [], 0, 0 - embeddings = [ - item["embedding"] if isinstance(item, dict) else item.embedding - for item in data - ] - if not embeddings: - return [], 0, 0 - if len(embeddings) == 1: - return embeddings[0], len(embeddings[0]), 1 - return embeddings, len(embeddings[0]), len(embeddings) - except Exception: - return [], 0, 0 - - -def _get_embedding_model_parameters(kwargs: Dict[str, Any]) -> Dict[str, Any]: - return { - "dimensions": kwargs.get("dimensions"), - "encoding_format": kwargs.get("encoding_format"), - "user": kwargs.get("user"), - "timeout": kwargs.get("timeout"), - "api_base": kwargs.get("api_base"), - "api_version": kwargs.get("api_version"), - } - - def get_model_parameters(kwargs: Dict[str, Any]) -> Dict[str, Any]: """Gets the model parameters from the kwargs.""" return { diff --git a/src/openlayer/lib/integrations/openai_tracer.py b/src/openlayer/lib/integrations/openai_tracer.py index 64bf606b..c64c70ae 100644 --- a/src/openlayer/lib/integrations/openai_tracer.py +++ b/src/openlayer/lib/integrations/openai_tracer.py @@ -27,6 +27,7 @@ ImageContent, TextContent, ) +from ._openai_embedding_common import build_embedding_step_kwargs logger = logging.getLogger(__name__) @@ -1635,46 +1636,21 @@ def handle_embedding( **kwargs, ) -> Any: """Trace a sync OpenAI client.embeddings.create() call.""" - from ._openai_embedding_common import ( - get_embedding_model_parameters as _get_embedding_model_parameters, - ) - from ._openai_embedding_common import ( - parse_embedding_response as _parse_embedding_response, - ) - start_time = time.time() response = original_func(*args, **kwargs) end_time = time.time() try: - model_name = getattr(response, "model", kwargs.get("model", "unknown")) - embeddings, dim, count = _parse_embedding_response(response) - usage = getattr(response, "usage", None) - prompt_tokens = getattr(usage, "prompt_tokens", 0) if usage else 0 - total_tokens = ( - getattr(usage, "total_tokens", prompt_tokens) if usage else prompt_tokens - ) - tracer.add_embedding_step_to_trace( - name="OpenAI Embedding", - end_time=end_time, - inputs={"input": kwargs.get("input")}, - output=embeddings, - latency=(end_time - start_time) * 1000, - tokens=total_tokens, - prompt_tokens=prompt_tokens, - model=model_name, - model_parameters=_get_embedding_model_parameters(kwargs), - embedding_dimensions=dim, - embedding_count=count, - raw_output=( - response.model_dump() - if hasattr(response, "model_dump") - else str(response) - ), - provider="OpenAI", - id=inference_id, - metadata={"provider": "OpenAI"}, + **build_embedding_step_kwargs( + response, + kwargs, + start_time, + end_time, + name="OpenAI Embedding", + provider="OpenAI", + inference_id=inference_id, + ) ) except Exception as e: logger.error(