From 394afb31e3ba0573c29a25bd268c7a08d4ce693a Mon Sep 17 00:00:00 2001 From: RheagalFire Date: Tue, 2 Jun 2026 00:55:57 +0530 Subject: [PATCH 1/3] feat: add LiteLLM as LLM provider --- app/models/factory.py | 11 ++++++ app/models/litellm_model.py | 74 +++++++++++++++++++++++++++++++++++++ app/ui.py | 13 +++++++ requirements.txt | 1 + 4 files changed, 99 insertions(+) create mode 100644 app/models/litellm_model.py diff --git a/app/models/factory.py b/app/models/factory.py index 2057be3..361b9a8 100644 --- a/app/models/factory.py +++ b/app/models/factory.py @@ -3,6 +3,15 @@ from models.gpt5 import GPT5 from models.openai_computer_use import OpenAIComputerUse from models.gemini import Gemini +from models.litellm_model import LiteLLMModel + +LITELLM_PREFIXES = ( + 'anthropic/', 'bedrock/', 'vertex_ai/', 'azure/', 'azure_ai/', + 'huggingface/', 'ollama/', 'cohere/', 'mistral/', 'groq/', + 'together_ai/', 'fireworks_ai/', 'deepseek/', 'perplexity/', + 'replicate/', 'ai21/', 'cloudflare/', 'cerebras/', 'sambanova/', + 'nvidia_nim/', 'xai/', 'litellm/', +) class ModelFactory: @@ -19,6 +28,8 @@ def create_model(model_name, *args): return GPT4v(model_name, *args) elif model_name.startswith("gemini"): return Gemini(model_name, *args[1:]) + elif model_name.startswith(LITELLM_PREFIXES): + return LiteLLMModel(model_name, *args) else: # Llama/Llava models will work with the standard code I wrote for GPT4V without the assitant mode features of gpt4o return GPT4v(model_name, *args) diff --git a/app/models/litellm_model.py b/app/models/litellm_model.py new file mode 100644 index 0000000..724b64a --- /dev/null +++ b/app/models/litellm_model.py @@ -0,0 +1,74 @@ +import json +from typing import Any + +import litellm + +from models.model import Model +from utils.screen import Screen + + +class LiteLLMModel(Model): + def __init__(self, model_name, base_url, api_key, context): + self.model_name = model_name + self.base_url = base_url + self.api_key = api_key + self.context = context + + def get_instructions_for_objective(self, original_user_request: str, step_num: int = 0) -> dict[str, Any]: + message = self.format_user_request_for_llm(original_user_request, step_num) + llm_response = self.send_message_to_llm(message) + json_instructions: dict[str, Any] = self.convert_llm_response_to_json_instructions(llm_response) + return json_instructions + + def format_user_request_for_llm(self, original_user_request: str, step_num: int) -> list[dict[str, Any]]: + base64_img: str = Screen().get_screenshot_in_base64() + request_data: str = json.dumps({ + 'original_user_request': original_user_request, + 'step_num': step_num + }) + + message = [ + {'type': 'text', 'text': self.context + request_data}, + {'type': 'image_url', + 'image_url': { + 'url': f'data:image/jpeg;base64,{base64_img}' + } + } + ] + + return message + + def send_message_to_llm(self, message) -> Any: + kwargs = { + 'model': self.model_name, + 'messages': [ + { + 'role': 'user', + 'content': message, + } + ], + 'max_tokens': 800, + 'drop_params': True, + } + + if self.api_key: + kwargs['api_key'] = self.api_key + if self.base_url: + kwargs['api_base'] = self.base_url + + response = litellm.completion(**kwargs) + return response + + def convert_llm_response_to_json_instructions(self, llm_response) -> dict[str, Any]: + llm_response_data: str = llm_response.choices[0].message.content.strip() + + start_index = llm_response_data.find('{') + end_index = llm_response_data.rfind('}') + + try: + json_response = json.loads(llm_response_data[start_index:end_index + 1].strip()) + except Exception as e: + print(f'Error while parsing JSON response - {e}') + json_response = {} + + return json_response diff --git a/app/ui.py b/app/ui.py index 375fed8..6aea837 100644 --- a/app/ui.py +++ b/app/ui.py @@ -69,6 +69,13 @@ def create_widgets(self) -> None: ('Gemini gemini-3-flash-preview', 'gemini-3-flash-preview'), ] + litellm_models = [ + ('LiteLLM: anthropic/claude-sonnet-4-20250514', 'anthropic/claude-sonnet-4-20250514'), + ('LiteLLM: anthropic/claude-haiku-4-5-20251001', 'anthropic/claude-haiku-4-5-20251001'), + ('LiteLLM: bedrock/anthropic.claude-sonnet-4-20250514-v1:0', 'bedrock/anthropic.claude-sonnet-4-20250514-v1:0'), + ('LiteLLM: vertex_ai/gemini-2.5-flash', 'vertex_ai/gemini-2.5-flash'), + ] + deprecated_models = [ ('GPT-4o (Medium-Accurate, Medium-Fast)', 'gpt-4o'), ('GPT-4o-mini (Cheapest, Fastest)', 'gpt-4o-mini'), @@ -93,6 +100,12 @@ def create_widgets(self) -> None: ttk.Radiobutton(radio_frame, text=text, value=value, variable=self.model_var, bootstyle="info").pack( anchor=ttk.W, pady=5) + ttk.Separator(radio_frame, orient='horizontal').pack(fill='x', pady=8) + + for text, value in litellm_models: + ttk.Radiobutton(radio_frame, text=text, value=value, variable=self.model_var, bootstyle="info").pack( + anchor=ttk.W, pady=5) + ttk.Separator(radio_frame, orient='horizontal').pack(fill='x', pady=10) self.deprecated_expanded = False diff --git a/requirements.txt b/requirements.txt index a0c635f..6b4e170 100644 --- a/requirements.txt +++ b/requirements.txt @@ -37,3 +37,4 @@ ttkbootstrap==1.10.1 typing_extensions==4.12.2 urllib3==2.2.2 google-genai==1.5.0 +litellm>=1.80.0,<1.87.0 From 609d50e8408addc23ad88e67f3e63f3fe765cbef Mon Sep 17 00:00:00 2001 From: RheagalFire Date: Tue, 2 Jun 2026 01:11:50 +0530 Subject: [PATCH 2/3] fix: filter default OpenAI base_url to prevent breaking litellm routing --- app/models/litellm_model.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/app/models/litellm_model.py b/app/models/litellm_model.py index 724b64a..50c7847 100644 --- a/app/models/litellm_model.py +++ b/app/models/litellm_model.py @@ -8,11 +8,16 @@ class LiteLLMModel(Model): + # Default base_url injected by llm.py when no custom URL is set. + _DEFAULT_OPENAI_BASE_URL = 'https://api.openai.com/v1/' + def __init__(self, model_name, base_url, api_key, context): self.model_name = model_name - self.base_url = base_url self.api_key = api_key self.context = context + # Only store base_url if the user explicitly set a custom endpoint. + # The default OpenAI URL would break LiteLLM's own provider routing. + self.base_url = base_url if base_url != self._DEFAULT_OPENAI_BASE_URL else None def get_instructions_for_objective(self, original_user_request: str, step_num: int = 0) -> dict[str, Any]: message = self.format_user_request_for_llm(original_user_request, step_num) From 67ea4b76e014a6ff56c0023463daeaa79c44ca6e Mon Sep 17 00:00:00 2001 From: RheagalFire Date: Tue, 2 Jun 2026 01:14:52 +0530 Subject: [PATCH 3/3] fix: remove litellm radio buttons, use custom model field instead --- app/ui.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/app/ui.py b/app/ui.py index 6aea837..375fed8 100644 --- a/app/ui.py +++ b/app/ui.py @@ -69,13 +69,6 @@ def create_widgets(self) -> None: ('Gemini gemini-3-flash-preview', 'gemini-3-flash-preview'), ] - litellm_models = [ - ('LiteLLM: anthropic/claude-sonnet-4-20250514', 'anthropic/claude-sonnet-4-20250514'), - ('LiteLLM: anthropic/claude-haiku-4-5-20251001', 'anthropic/claude-haiku-4-5-20251001'), - ('LiteLLM: bedrock/anthropic.claude-sonnet-4-20250514-v1:0', 'bedrock/anthropic.claude-sonnet-4-20250514-v1:0'), - ('LiteLLM: vertex_ai/gemini-2.5-flash', 'vertex_ai/gemini-2.5-flash'), - ] - deprecated_models = [ ('GPT-4o (Medium-Accurate, Medium-Fast)', 'gpt-4o'), ('GPT-4o-mini (Cheapest, Fastest)', 'gpt-4o-mini'), @@ -100,12 +93,6 @@ def create_widgets(self) -> None: ttk.Radiobutton(radio_frame, text=text, value=value, variable=self.model_var, bootstyle="info").pack( anchor=ttk.W, pady=5) - ttk.Separator(radio_frame, orient='horizontal').pack(fill='x', pady=8) - - for text, value in litellm_models: - ttk.Radiobutton(radio_frame, text=text, value=value, variable=self.model_var, bootstyle="info").pack( - anchor=ttk.W, pady=5) - ttk.Separator(radio_frame, orient='horizontal').pack(fill='x', pady=10) self.deprecated_expanded = False