diff --git a/backend/danswer/configs/model_configs.py b/backend/danswer/configs/model_configs.py index 435ba99fef5..1cc608939d7 100644 --- a/backend/danswer/configs/model_configs.py +++ b/backend/danswer/configs/model_configs.py @@ -84,6 +84,8 @@ GEN_AI_IDENTITY_ENDPOINT = os.environ.get("GEN_AI_IDENTITY_ENDPOINT") or None GEN_AI_CLIENT_ID = os.environ.get("GEN_AI_CLIENT_ID") or None GEN_AI_CLIENT_SECRET = os.environ.get("GEN_AI_CLIENT_SECRET") or None +GEN_AI_ACCOUNT_ID = os.environ.get("GEN_AI_ACCOUNT_ID") or None +GEN_AI_TENANT_ID = os.environ.get("GEN_AI_TENANT_ID") or None # Number of tokens from chat history to include at maximum # 3000 should be enough context regardless of use, no need to include as much as possible # as this drives up the cost unnecessarily diff --git a/backend/danswer/llm/custom_llm.py b/backend/danswer/llm/custom_llm.py index 8ab91afba0e..5fdd3bc57d5 100644 --- a/backend/danswer/llm/custom_llm.py +++ b/backend/danswer/llm/custom_llm.py @@ -8,11 +8,13 @@ from langchain_core.messages import BaseMessage from requests import Timeout +from danswer.configs.model_configs import GEN_AI_ACCOUNT_ID from danswer.configs.model_configs import GEN_AI_API_VERSION from danswer.configs.model_configs import GEN_AI_CLIENT_ID from danswer.configs.model_configs import GEN_AI_CLIENT_SECRET from danswer.configs.model_configs import GEN_AI_IDENTITY_ENDPOINT from danswer.configs.model_configs import GEN_AI_MAX_OUTPUT_TOKENS +from danswer.configs.model_configs import GEN_AI_TENANT_ID from danswer.llm.interfaces import LLM from danswer.llm.interfaces import LLMConfig from danswer.llm.interfaces import ToolChoiceOptions @@ -66,10 +68,12 @@ def __init__( api_key: str | None, timeout: int, endpoint: str - | None = "https://alpha.uipath.com/llmgateway_/openai/deployments/gpt-4o-mini-2024-07-18/chat/completions?api-version=2024-06-01", + | None = "https://alpha.uipath.com/{account_id}/{tenant_id}/llmgateway_/api/raw/vendor/openai/model/gpt-4o-2024-11-20/completions", identity_url: str | None = GEN_AI_IDENTITY_ENDPOINT, client_id: str | None = GEN_AI_CLIENT_ID, client_secret: str | None = GEN_AI_CLIENT_SECRET, + account_id: str | None = GEN_AI_ACCOUNT_ID, + tenant_id: str | None = GEN_AI_TENANT_ID, max_output_tokens: int = int(GEN_AI_MAX_OUTPUT_TOKENS), api_version: str | None = GEN_AI_API_VERSION, ): @@ -97,6 +101,18 @@ def __init__( "client_secret for the model server." ) + if not account_id: + raise ValueError( + "Cannot point Danswer to a custom LLM server without providing the " + "account_id (GEN_AI_ACCOUNT_ID) for the model server." + ) + + if not tenant_id: + raise ValueError( + "Cannot point Danswer to a custom LLM server without providing the " + "tenant_id (GEN_AI_TENANT_ID) for the model server." + ) + # TODO: implement api versions for endpoints and add those to model # if not api_version: # raise ValueError( @@ -107,7 +123,9 @@ def __init__( self._identity_url = identity_url self._client_id = client_id self._client_secret = client_secret - self._endpoint = endpoint + self._account_id = account_id + self._tenant_id = tenant_id + self._endpoint = endpoint.format(account_id=account_id, tenant_id=tenant_id) self._max_output_tokens = max_output_tokens self._timeout = timeout self.token = self._get_token() @@ -123,11 +141,13 @@ def __init__( def _execute(self, input: LanguageModelInput) -> AIMessage: headers = { "Content-Type": "application/json", - "X-UiPath-LlmGateway-RequestedFeature": "ChatWithAssistant", - "X-UiPath-LlmGateway-RequestingFeature": "ChatWithAssistant", + "Authorization": "Bearer " + self.token, "X-UiPath-LlmGateway-RequestingProduct": "darwin", + "X-UiPath-LlmGateway-RequestingFeature": "ChatWithAssistant", + "X-UiPath-LlmGateway-ApiFlavor": "chat-completions", + "X-UiPath-LlmGateway-ApiVersion": "2024-10-21", "X-UiPath-LlmGateway-TimeoutSeconds": "60", - "Authorization": "Bearer " + self.token, + "X-UIPATH-STREAMING-ENABLED": "false", } # print(f"Input: {input}") diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index 99e8bb14573..5a7136c2eb4 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -42,6 +42,8 @@ services: - GEN_AI_IDENTITY_ENDPOINT=https://alpha.uipath.com/identity_/connect/token - GEN_AI_CLIENT_ID=${GEN_AI_CLIENT_ID:-} - GEN_AI_CLIENT_SECRET=${GEN_AI_CLIENT_SECRET:-} + - GEN_AI_ACCOUNT_ID=${GEN_AI_ACCOUNT_ID:-} + - GEN_AI_TENANT_ID=${GEN_AI_TENANT_ID:-} - GEN_AI_API_VERSION=${GEN_AI_API_VERSION:-} - GEN_AI_LLM_PROVIDER_TYPE=${GEN_AI_LLM_PROVIDER_TYPE:-} - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-} @@ -129,6 +131,8 @@ services: - GEN_AI_IDENTITY_ENDPOINT=https://alpha.uipath.com/identity_/connect/token - GEN_AI_CLIENT_ID=${GEN_AI_CLIENT_ID:-} - GEN_AI_CLIENT_SECRET=${GEN_AI_CLIENT_SECRET:-} + - GEN_AI_ACCOUNT_ID=${GEN_AI_ACCOUNT_ID:-} + - GEN_AI_TENANT_ID=${GEN_AI_TENANT_ID:-} - GEN_AI_API_VERSION=${GEN_AI_API_VERSION:-} - GEN_AI_LLM_PROVIDER_TYPE=${GEN_AI_LLM_PROVIDER_TYPE:-} - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-} diff --git a/deployment/docker_compose/docker-compose.local.yml b/deployment/docker_compose/docker-compose.local.yml index 453de077d04..85c206622e7 100644 --- a/deployment/docker_compose/docker-compose.local.yml +++ b/deployment/docker_compose/docker-compose.local.yml @@ -43,6 +43,8 @@ services: - GEN_AI_IDENTITY_ENDPOINT=https://alpha.uipath.com/identity_/connect/token - GEN_AI_CLIENT_ID=${GEN_AI_CLIENT_ID:-} - GEN_AI_CLIENT_SECRET=${GEN_AI_CLIENT_SECRET:-} + - GEN_AI_ACCOUNT_ID=${GEN_AI_ACCOUNT_ID:-} + - GEN_AI_TENANT_ID=${GEN_AI_TENANT_ID:-} - GEN_AI_API_VERSION=${GEN_AI_API_VERSION:-} - GEN_AI_LLM_PROVIDER_TYPE=${GEN_AI_LLM_PROVIDER_TYPE:-} - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-} @@ -132,6 +134,8 @@ services: - GEN_AI_IDENTITY_ENDPOINT=https://alpha.uipath.com/identity_/connect/token - GEN_AI_CLIENT_ID=${GEN_AI_CLIENT_ID:-} - GEN_AI_CLIENT_SECRET=${GEN_AI_CLIENT_SECRET:-} + - GEN_AI_ACCOUNT_ID=${GEN_AI_ACCOUNT_ID:-} + - GEN_AI_TENANT_ID=${GEN_AI_TENANT_ID:-} - GEN_AI_API_VERSION=${GEN_AI_API_VERSION:-} - GEN_AI_LLM_PROVIDER_TYPE=${GEN_AI_LLM_PROVIDER_TYPE:-} - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-}