-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathllm_runtime.py
More file actions
30 lines (26 loc) · 865 Bytes
/
llm_runtime.py
File metadata and controls
30 lines (26 loc) · 865 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import threading
import requests
MODEL_NAME = "mistral"
OLLAMA_BASE_URL = "http://127.0.0.1:11434"
model_verified = False
lock = threading.Lock()
def ensure_model() -> None:
"""Verify the model is available in Ollama. No-op after first successful check."""
global model_verified
if model_verified:
return
with lock:
if model_verified:
return
try:
response = requests.get(
f"{OLLAMA_BASE_URL}/api/tags",
timeout=5,
)
if response.ok:
models = [m.get("name", "") for m in response.json().get("models", [])]
if any(MODEL_NAME in m for m in models):
model_verified = True
except Exception:
# Ollama not reachable — let the caller's request fail naturally
pass