From f9b1fd1ca0d8e8ef43606d0fdb9a9e8118b1aad5 Mon Sep 17 00:00:00 2001 From: fern-support <126544928+fern-support@users.noreply.github.com> Date: Tue, 5 May 2026 12:35:02 -0400 Subject: [PATCH] fix: handle missing content-length header in _get_tokenizer_config_size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a server uses chunked transfer encoding, neither content-length nor x-goog-stored-content-length headers are present, leaving size as None. The subsequent int() cast then raises a TypeError. Add an explicit None check that raises a ValueError instead, which is caught gracefully by the existing try/except in both callers (get_hf_tokenizer and async_get_hf_tokenizer) — tokenizer download proceeds normally. Fixes: https://github.com/cohere-ai/cohere-python/issues/762 --- src/cohere/manually_maintained/tokenizers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cohere/manually_maintained/tokenizers.py b/src/cohere/manually_maintained/tokenizers.py index c2681c62f..add92b2b3 100644 --- a/src/cohere/manually_maintained/tokenizers.py +++ b/src/cohere/manually_maintained/tokenizers.py @@ -99,4 +99,6 @@ def _get_tokenizer_config_size(tokenizer_url: str) -> float: if size: break + if size is None: + raise ValueError("No content-length header found (server may use chunked transfer encoding)") return round(int(typing.cast(int, size)) / 1024 / 1024, 2)