braintrustdata · Abhijeet Prasad (AbhiPrasad) · May 21, 2026
diff --git a/py/noxfile.py b/py/noxfile.py
@@ -534,6 +534,21 @@ def test_mistral(session, version):
     _run_tests(session, f"{INTEGRATION_DIR}/mistral/test_mistral.py", version=version)
 
 
+HUGGINGFACE_HUB_VERSIONS = _get_matrix_versions("huggingface-hub")
+
+
+@nox.session()
+@nox.parametrize("version", HUGGINGFACE_HUB_VERSIONS, ids=HUGGINGFACE_HUB_VERSIONS)
+def test_huggingface_hub(session, version):
+    """Test the native HuggingFace Hub SDK integration."""
+    _install_test_deps(session)
+    _install_matrix_dep(session, "huggingface-hub", version)
+    # numpy is required by ``InferenceClient.feature_extraction`` but is not
+    # an install_requires dep of ``huggingface_hub`` upstream.
+    _install_group_locked(session, "test-huggingface-hub")
+    _run_tests(session, f"{INTEGRATION_DIR}/huggingface_hub/test_huggingface_hub.py", version=version)
+
+
 TEMPORAL_VERSIONS = _get_matrix_versions("temporalio")
 
 

diff --git a/py/pyproject.toml b/py/pyproject.toml
@@ -179,6 +179,18 @@ test-llamaindex = [
     {include-group = "test"},
 ]
 
+test-huggingface-hub = [
+    {include-group = "test"},
+    # numpy is required by huggingface_hub.InferenceClient.feature_extraction
+    # but is not declared as an install_requires dep upstream.
+    "numpy",
+    # huggingface_hub<1.0 implements AsyncInferenceClient on top of aiohttp;
+    # newer versions ship with httpx already. Pinning aiohttp here keeps the
+    # 0.32.0 floor working without leaking the requirement onto the rest of
+    # the test environment.
+    "aiohttp",
+]
+
 test-cli = [
     {include-group = "test"},
     "httpx==0.28.1",
@@ -227,6 +239,8 @@ lint = [
     "openai",
     "openai-agents",
     "openrouter",
+    "huggingface-hub",
+    "numpy",
     "strands-agents",
     "temporalio",
     "pydantic-ai>=1.10.0",
@@ -400,6 +414,14 @@ latest = "openrouter==0.9.1"
 latest = "mistralai==2.4.5"
 "1.12.4" = "mistralai==1.12.4"
 
+[tool.braintrust.matrix.huggingface-hub]
+# Floor pinned to 0.32.0: the earliest release that exposes the
+# ``provider="auto"`` routing mode the integration relies on for multi-
+# provider use, while still keeping the same chat_completion /
+# text_generation / feature_extraction / sentence_similarity method names.
+latest = "huggingface-hub==1.15.0"
+"0.32.0" = "huggingface-hub==0.32.0"
+
 [tool.braintrust.matrix.temporalio]
 latest = "temporalio==1.27.2"
 "1.20.0" = "temporalio==1.20.0"
@@ -442,6 +464,7 @@ claude_agent_sdk = ["claude-agent-sdk"]
 crewai = ["crewai"]
 dspy = ["dspy"]
 google_genai = ["google-genai"]
+huggingface_hub = ["huggingface-hub"]
 langchain = ["langchain-core"]
 litellm = ["litellm"]
 livekit_agents = ["livekit-agents"]
@@ -469,6 +492,7 @@ google-genai = "google.genai"
 litellm = "litellm"
 livekit-agents = "livekit.agents"
 mistralai = "mistralai"
+huggingface-hub = "huggingface_hub"
 openai = "openai"
 openai-agents = "agents"
 openrouter = "openrouter"

diff --git a/py/src/braintrust/auto.py b/py/src/braintrust/auto.py
@@ -18,6 +18,7 @@
     CrewAIIntegration,
     DSPyIntegration,
     GoogleGenAIIntegration,
+    HuggingFaceHubIntegration,
     LangChainIntegration,
     LiteLLMIntegration,
     LiveKitAgentsIntegration,
@@ -58,6 +59,7 @@ def auto_instrument(
     google_genai: bool = True,
     openrouter: bool = True,
     mistral: bool = True,
+    huggingface_hub: bool = True,
     agno: bool = True,
     agentscope: bool = True,
     claude_agent_sdk: bool = True,
@@ -90,6 +92,7 @@ def auto_instrument(
         google_genai: Enable Google GenAI instrumentation (default: True)
         openrouter: Enable OpenRouter instrumentation (default: True)
         mistral: Enable Mistral instrumentation (default: True)
+        huggingface_hub: Enable HuggingFace Hub instrumentation (default: True)
         agno: Enable Agno instrumentation (default: True)
         agentscope: Enable AgentScope instrumentation (default: True)
         claude_agent_sdk: Enable Claude Agent SDK instrumentation (default: True)
@@ -165,6 +168,8 @@ def auto_instrument(
         results["openrouter"] = _instrument_integration(OpenRouterIntegration)
     if mistral:
         results["mistral"] = _instrument_integration(MistralIntegration)
+    if huggingface_hub:
+        results["huggingface_hub"] = _instrument_integration(HuggingFaceHubIntegration)
     if agno:
         results["agno"] = _instrument_integration(AgnoIntegration)
     if agentscope:

diff --git a/py/src/braintrust/integrations/__init__.py b/py/src/braintrust/integrations/__init__.py
@@ -8,6 +8,7 @@
 from .crewai import CrewAIIntegration
 from .dspy import DSPyIntegration
 from .google_genai import GoogleGenAIIntegration
+from .huggingface_hub import HuggingFaceHubIntegration
 from .langchain import LangChainIntegration
 from .litellm import LiteLLMIntegration
 from .livekit_agents import LiveKitAgentsIntegration
@@ -32,6 +33,7 @@
     "CrewAIIntegration",
     "DSPyIntegration",
     "GoogleGenAIIntegration",
+    "HuggingFaceHubIntegration",
     "LiteLLMIntegration",
     "LiveKitAgentsIntegration",
     "LangChainIntegration",

diff --git a/py/src/braintrust/integrations/auto_test_scripts/test_auto_huggingface_hub.py b/py/src/braintrust/integrations/auto_test_scripts/test_auto_huggingface_hub.py
@@ -0,0 +1,43 @@
+"""Test auto_instrument for HuggingFace Hub."""
+
+import os
+
+
+# Dummy token must start with ``hf_`` so the HuggingFace SDK accepts it for
+# ``provider="auto"`` routing (validated locally before any HTTP request).
+os.environ.setdefault("HF_TOKEN", "hf_test_dummy_api_key_for_vcr_tests")
+
+from braintrust.auto import auto_instrument
+from braintrust.integrations.test_utils import autoinstrument_test_context
+from huggingface_hub import InferenceClient
+
+
+results = auto_instrument()
+assert results.get("huggingface_hub") is True
+
+results2 = auto_instrument()
+assert results2.get("huggingface_hub") is True
+
+
+CHAT_MODEL = "meta-llama/Llama-3.1-8B-Instruct"
+
+
+with autoinstrument_test_context("test_auto_huggingface_hub", integration="huggingface_hub") as memory_logger:
+    # ``provider="cerebras"`` hosts ``meta-llama/Llama-3.1-8B-Instruct`` across
+    # the matrix; ``hf-inference`` no longer hosts most conversational checkpoints.
+    client = InferenceClient(model=CHAT_MODEL, provider="cerebras", token=os.environ["HF_TOKEN"])
+    response = client.chat_completion(
+        messages=[{"role": "user", "content": "Say hi in one word."}],
+        max_tokens=10,
+    )
+    assert response.choices
+    assert response.choices[0].message.role == "assistant"
+
+    spans = memory_logger.pop()
+    assert len(spans) == 1, f"Expected 1 span, got {len(spans)}"
+    span = spans[0]
+    # User-supplied ``provider`` overrides the default "huggingface" identity.
+    assert span["metadata"]["provider"] == "cerebras"
+    assert span["span_attributes"]["name"] == "huggingface.chat_completion"
+
+print("SUCCESS")
diff --git a/py/src/braintrust/integrations/huggingface_hub/__init__.py b/py/src/braintrust/integrations/huggingface_hub/__init__.py
@@ -0,0 +1,10 @@
+"""Braintrust integration for the HuggingFace Hub Python SDK."""
+
+from .integration import HuggingFaceHubIntegration
+from .tracing import wrap_huggingface_hub
+
+
+__all__ = [
+    "HuggingFaceHubIntegration",
+    "wrap_huggingface_hub",
+]
diff --git a/...c/braintrust/integrations/huggingface_hub/cassettes/0.32.0/test_auto_huggingface_hub.yaml b/...c/braintrust/integrations/huggingface_hub/cassettes/0.32.0/test_auto_huggingface_hub.yaml
@@ -0,0 +1,163 @@
+interactions:
+- request:
+    body: null
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      X-Amzn-Trace-Id:
+      - 612753ff-29c0-4031-a93f-02b1c6d24692
+      user-agent:
+      - unknown/None; hf_hub/0.32.0; python/3.14.3
+    method: GET
+    uri: https://huggingface.co/api/models/meta-llama/Llama-3.1-8B-Instruct?expand=inferenceProviderMapping
+  response:
+    body:
+      string: '{"_id":"6698d8a0653e4babe21e1e7d","id":"meta-llama/Llama-3.1-8B-Instruct","inferenceProviderMapping":{"novita":{"status":"live","providerId":"meta-llama/llama-3.1-8b-instruct","task":"conversational","isModelAuthor":false},"cerebras":{"status":"live","providerId":"llama3.1-8b","task":"conversational","isModelAuthor":false},"nscale":{"status":"live","providerId":"meta-llama/Llama-3.1-8B-Instruct","task":"conversational","isModelAuthor":false},"featherless-ai":{"status":"live","providerId":"meta-llama/Meta-Llama-3.1-8B-Instruct","task":"conversational","isModelAuthor":false},"scaleway":{"status":"live","providerId":"llama-3.1-8b-instruct","task":"conversational","isModelAuthor":false},"sambanova":{"status":"error","providerId":"Meta-Llama-3.1-8B-Instruct","task":"conversational","isModelAuthor":false}}}'
+    headers:
+      Access-Control-Allow-Origin:
+      - https://huggingface.co
+      Access-Control-Expose-Headers:
+      - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+      Access-Control-Max-Age:
+      - '86400'
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '812'
+      Content-Type:
+      - application/json; charset=utf-8
+      Date:
+      - Wed, 20 May 2026 18:01:23 GMT
+      ETag:
+      - W/"32c-AuB6sgZTIbkNCeInjqjAzrSGN6U"
+      RateLimit:
+      - '"api";r=980;t=222'
+      RateLimit-Policy:
+      - '"fixed window";"api";q=1000;w=300'
+      Referrer-Policy:
+      - strict-origin-when-cross-origin
+      Vary:
+      - Origin
+      Via:
+      - 1.1 d03af248468c898a111754f0666c2316.cloudfront.net (CloudFront)
+      X-Amz-Cf-Id:
+      - 4WCrHaehFlOWwSsjyrmjjPL8GSMt6pbgFkVHBzAmSGeITCJD5UXyHQ==
+      X-Amz-Cf-Pop:
+      - YTO50-P2
+      X-Cache:
+      - Miss from cloudfront
+      X-Powered-By:
+      - huggingface-moon
+      X-Request-Id:
+      - Root=1-6a0df6f3-602614dd3aedf0224d2485a5;612753ff-29c0-4031-a93f-02b1c6d24692
+      cross-origin-opener-policy:
+      - same-origin
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages": [{"role": "user", "content": "Say hi in one word."}], "model":
+      "llama3.1-8b", "max_tokens": 10, "stream": false}'
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '125'
+      Content-Type:
+      - application/json
+      X-Amzn-Trace-Id:
+      - 06441962-8e36-40da-829c-8f76ca204c40
+      user-agent:
+      - unknown/None; hf_hub/0.32.0; python/3.14.3
+    method: POST
+    uri: https://router.huggingface.co/cerebras/v1/chat/completions
+  response:
+    body:
+      string: '{"id":"chatcmpl-e6cb12a1-9580-4ae5-b75a-aa4740402e6d","choices":[{"finish_reason":"stop","index":0,"message":{"content":"Hello.","role":"assistant"}}],"created":1779300084,"model":"llama3.1-8b","system_fingerprint":"fp_f613d2b18eccee549c5f","object":"chat.completion","usage":{"total_tokens":44,"completion_tokens":3,"completion_tokens_details":{"accepted_prediction_tokens":0,"rejected_prediction_tokens":0,"reasoning_tokens":0},"prompt_tokens":41,"prompt_tokens_details":{"cached_tokens":0}},"time_info":{"created":1779300084.3064463,"queue_time":8.325e-05,"prompt_time":0.002871066,"completion_time":0.001190025,"total_time":0.006201982498168945}}'
+    headers:
+      Access-Control-Allow-Origin:
+      - '*'
+      Access-Control-Expose-Headers:
+      - X-Repo-Commit,X-Request-Id,X-Error-Code,X-Error-Message,X-Total-Count,ETag,Link,Accept-Ranges,Content-Range,X-Linked-Size,X-Linked-ETag,X-Xet-Hash
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 20 May 2026 18:01:24 GMT
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      Via:
+      - 1.1 1469d4976bc2a36b5840519c9e3dbad6.cloudfront.net (CloudFront)
+      X-Amz-Cf-Id:
+      - NmYnNOX20NQBBfOkXA1R0Kn_6MQ5nP-tVZUOnVijYDWzH2rxZjVZYQ==
+      X-Amz-Cf-Pop:
+      - YTO50-P1
+      X-Cache:
+      - Miss from cloudfront
+      X-Powered-By:
+      - huggingface-moon
+      X-Robots-Tag:
+      - none
+      cf-cache-status:
+      - DYNAMIC
+      cf-ray:
+      - 9fed3f1698536887-IAD
+      cross-origin-opener-policy:
+      - same-origin
+      inference-id:
+      - chatcmpl-e6cb12a1-9580-4ae5-b75a-aa4740402e6d
+      referrer-policy:
+      - strict-origin-when-cross-origin
+      server:
+      - cloudflare
+      set-cookie:
+      - __cf_bm=PdRHzq1Nm0H6U3Y5MuDllSirlbhPgF4FUqP82CamXH8-1779300084.2544014-1.0.1.1-YQsooVKv6m_ucY1RpkxV5jVLBixoDGOKRY6MYm.LJ1z7sFcfdmuUqUebWWfcOJXRgmr4KFY4tCmrktgqfUJXR3Q0yiUd9VWDostaBrjOEO6PwT6WuWmFGwxVRK_gJC1i;
+        HttpOnly; SameSite=None; Secure; Path=/; Domain=api.cerebras.ai; Expires=Wed,
+        20 May 2026 18:31:24 GMT
+      strict-transport-security:
+      - max-age=15552000; includeSubDomains; preload
+      x-content-type-options:
+      - nosniff
+      x-inference-provider:
+      - cerebras
+      x-ratelimit-limit-requests-day:
+      - '2880000'
+      x-ratelimit-limit-requests-hour:
+      - '120000'
+      x-ratelimit-limit-requests-minute:
+      - '2000'
+      x-ratelimit-limit-tokens-day:
+      - '9223372036854775807'
+      x-ratelimit-limit-tokens-hour:
+      - '9223372036854775807'
+      x-ratelimit-limit-tokens-minute:
+      - '9223372036854775807'
+      x-ratelimit-remaining-requests-day:
+      - '2879999'
+      x-ratelimit-remaining-requests-hour:
+      - '119999'
+      x-ratelimit-remaining-requests-minute:
+      - '1999'
+      x-ratelimit-remaining-tokens-day:
+      - '9223372036854775807'
+      x-ratelimit-remaining-tokens-hour:
+      - '9223372036854775807'
+      x-ratelimit-remaining-tokens-minute:
+      - '9223372036854775807'
+      x-request-id:
+      - 9fed3f1698536887-IAD
+    status:
+      code: 200
+      message: OK
+version: 1