Skip to content

Commit 39f595f

Browse files
Merge pull request #132 from askui/refactor/extract-system-prompts-for-reuse-in-chat
refactor: extract system prompts for reuse in chat
2 parents 3d466f1 + 50faf55 commit 39f595f

9 files changed

Lines changed: 173 additions & 161 deletions

File tree

pyproject.toml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -180,26 +180,27 @@ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
180180
[tool.ruff.lint.per-file-ignores]
181181
"src/askui/agent.py" = ["E501"]
182182
"src/askui/android_agent.py" = ["E501"]
183-
"src/askui/web_agent.py" = ["E501"]
184-
"src/askui/models/shared/android_agent.py" = ["E501"]
185183
"src/askui/chat/*" = ["E501", "F401", "F403"]
186-
"src/askui/tools/askui/askui_ui_controller_grpc/*" = ["ALL"]
187184
"src/askui/locators/locators.py" = ["E501"]
188185
"src/askui/locators/relatable.py" = ["E501", "SLF001"]
189186
"src/askui/locators/serializers.py" = ["E501", "SLF001"]
190187
"src/askui/models/anthropic/computer_agent.py" = ["E501"]
191188
"src/askui/models/askui/ai_element_utils.py" = ["E501"]
192189
"src/askui/models/huggingface/spaces_api.py" = ["E501"]
190+
"src/askui/models/shared/android_agent.py" = ["E501"]
193191
"src/askui/models/ui_tars_ep/ui_tars_api.py" = ["E501"]
192+
"src/askui/prompts/system.py" = ["E501"]
194193
"src/askui/reporting.py" = ["E501"]
195194
"src/askui/telemetry/telemetry.py" = ["E501"]
195+
"src/askui/tools/askui/askui_ui_controller_grpc/*" = ["ALL"]
196196
"src/askui/utils/image_utils.py" = ["E501"]
197+
"src/askui/web_agent.py" = ["E501"]
197198
"tests/*" = ["S101", "PLR2004", "SLF001"]
198199
"tests/e2e/agent/test_get.py" = ["E501"]
199200
"tests/e2e/agent/test_locate_with_relations.py" = ["E501"]
200-
"tests/unit/locators/test_locators.py" = ["E501"]
201201
"tests/unit/locators/serializers/test_askui_locator_serializer.py" = ["E501"]
202202
"tests/unit/locators/serializers/test_locator_string_representation.py" = ["E501"]
203+
"tests/unit/locators/test_locators.py" = ["E501"]
203204
"tests/unit/utils/test_image_utils.py" = ["E501"]
204205

205206
[tool.ruff.lint.flake8-quotes]

src/askui/agent.py

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
import logging
2-
import platform
3-
import sys
4-
from datetime import datetime, timezone
52
from typing import Annotated, Literal, Optional
63

74
from pydantic import ConfigDict, Field, validate_call
@@ -17,6 +14,7 @@
1714
MessageSettings,
1815
)
1916
from askui.models.shared.tools import Tool
17+
from askui.prompts.system import COMPUTER_AGENT_SYSTEM_PROMPT
2018
from askui.tools.computer import Computer20241022Tool, Computer20250124Tool
2119
from askui.tools.exception_tool import ExceptionTool
2220
from askui.tools.list_displays_tool import ListDisplaysTool
@@ -31,33 +29,18 @@
3129
from .tools import AgentToolbox, ModifierKey, PcKey
3230
from .tools.askui import AskUiControllerClient
3331

34-
_SYSTEM_PROMPT = f"""<SYSTEM_CAPABILITY>
35-
* You are utilising a {sys.platform} machine using {platform.machine()} architecture with internet access.
36-
* When you cannot find something (application window, ui element etc.) on the currently selected/active displa/screen, check the other available displays by listing them and checking which one is currently active and then going through the other displays one by one until you find it or you have checked all of them.
37-
* When asked to perform web tasks try to open the browser (firefox, chrome, safari, ...) if not already open. Often you can find the browser icons in the toolbars of the operating systems.
38-
* When viewing a page it can be helpful to zoom out/in so that you can see everything on the page. Either that, or make sure you scroll down/up to see everything before deciding something isn't available.
39-
* When using your function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
40-
* The current date and time is {datetime.now(timezone.utc).strftime("%A, %B %d, %Y %H:%M:%S %z")}.
41-
</SYSTEM_CAPABILITY>
42-
43-
<IMPORTANT>
44-
* When using Firefox, if a startup wizard appears, IGNORE IT. Do not even click "skip this step". Instead, click on the address bar where it says "Search or enter address", and enter the appropriate search term or URL there.
45-
* If the item you are looking at is a pdf, if after taking a single screenshot of the pdf it seems that you want to read the entire document instead of trying to continue to read the pdf from your screenshots + navigation, determine the URL, use curl to download the pdf, install and use pdftotext to convert it to a text file, and then read that text file directly with your StrReplaceEditTool.
46-
</IMPORTANT>""" # noqa: DTZ002, E501
47-
48-
4932
_ANTHROPIC__CLAUDE__3_5__SONNET__20241022__ACT_SETTINGS = ActSettings(
5033
messages=MessageSettings(
5134
model=ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022,
52-
system=_SYSTEM_PROMPT,
35+
system=COMPUTER_AGENT_SYSTEM_PROMPT,
5336
betas=[COMPUTER_USE_20241022_BETA_FLAG],
5437
),
5538
)
5639

5740
_CLAUDE__SONNET__4__20250514__ACT_SETTINGS = ActSettings(
5841
messages=MessageSettings(
5942
model=ModelName.CLAUDE__SONNET__4__20250514,
60-
system=_SYSTEM_PROMPT,
43+
system=COMPUTER_AGENT_SYSTEM_PROMPT,
6144
betas=[COMPUTER_USE_20250124_BETA_FLAG],
6245
thinking={"type": "enabled", "budget_tokens": 2048},
6346
),

src/askui/android_agent.py

Lines changed: 3 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from askui.container import telemetry
99
from askui.locators.locators import Locator
1010
from askui.models.shared.settings import ActSettings, MessageSettings
11+
from askui.prompts.system import ANDROID_AGENT_SYSTEM_PROMPT
1112
from askui.tools.android.agent_os import ANDROID_KEY
1213
from askui.tools.android.agent_os_facade import AndroidAgentOsFacade
1314
from askui.tools.android.ppadb_agent_os import PpadbAgentOs
@@ -29,90 +30,18 @@
2930
from .reporting import CompositeReporter, Reporter
3031
from .retry import Retry
3132

32-
_SYSTEM_PROMPT = """
33-
You are an autonomous Android device control agent operating via ADB on a test device with full system access.
34-
Your primary goal is to execute tasks efficiently and reliably while maintaining system stability.
35-
36-
<CORE PRINCIPLES>
37-
* Autonomy: Operate independently and make informed decisions without requiring user input.
38-
* Never ask for other tasks to be done, only do the task you are given.
39-
* Reliability: Ensure actions are repeatable and maintain system stability.
40-
* Efficiency: Optimize operations to minimize latency and resource usage.
41-
* Safety: Always verify actions before execution, even with full system access.
42-
</CORE PRINCIPLES>
43-
44-
<OPERATIONAL GUIDELINES>
45-
1. Tool Usage:
46-
* Verify tool availability before starting any operation
47-
* Use the most direct and efficient tool for each task
48-
* Combine tools strategically for complex operations
49-
* Prefer built-in tools over shell commands when possible
50-
51-
2. Error Handling:
52-
* Assess failures systematically: check tool availability, permissions, and device state
53-
* Implement retry logic with exponential backoff for transient failures
54-
* Use fallback strategies when primary approaches fail
55-
* Provide clear, actionable error messages with diagnostic information
56-
57-
3. Performance Optimization:
58-
* Use one-liner shell commands with inline filtering (grep, cut, awk, jq) for efficiency
59-
* Minimize screen captures and coordinate calculations
60-
* Cache device state information when appropriate
61-
* Batch related operations when possible
62-
63-
4. Screen Interaction:
64-
* Ensure all coordinates are integers and within screen bounds
65-
* Implement smart scrolling for off-screen elements
66-
* Use appropriate gestures (tap, swipe, drag) based on context
67-
* Verify element visibility before interaction
68-
69-
5. System Access:
70-
* Leverage full system access responsibly
71-
* Use shell commands for system-level operations
72-
* Monitor system state and resource usage
73-
* Maintain system stability during operations
74-
75-
6. Recovery Strategies:
76-
* If an element is not visible, try:
77-
- Scrolling in different directions
78-
- Adjusting view parameters
79-
- Using alternative interaction methods
80-
* If a tool fails:
81-
- Check device connection and state
82-
- Verify tool availability and permissions
83-
- Try alternative tools or approaches
84-
* If stuck:
85-
- Provide clear diagnostic information
86-
- Suggest potential solutions
87-
- Request user intervention only if necessary
88-
89-
7. Best Practices:
90-
* Document all significant operations
91-
* Maintain operation logs for debugging
92-
* Implement proper cleanup after operations
93-
* Follow Android best practices for UI interaction
94-
95-
<IMPORTANT NOTES>
96-
* This is a test device with full system access - use this capability responsibly
97-
* Always verify the success of critical operations
98-
* Maintain system stability as the highest priority
99-
* Provide clear, actionable feedback for all operations
100-
* Use the most efficient method for each task
101-
</IMPORTANT NOTES>
102-
"""
103-
10433
_ANTHROPIC__CLAUDE__3_5__SONNET__20241022__ACT_SETTINGS = ActSettings(
10534
messages=MessageSettings(
10635
model=ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022,
107-
system=_SYSTEM_PROMPT,
36+
system=ANDROID_AGENT_SYSTEM_PROMPT,
10837
betas=[],
10938
),
11039
)
11140

11241
_CLAUDE__SONNET__4__20250514__ACT_SETTINGS = ActSettings(
11342
messages=MessageSettings(
11443
model=ModelName.CLAUDE__SONNET__4__20250514,
115-
system=_SYSTEM_PROMPT,
44+
system=ANDROID_AGENT_SYSTEM_PROMPT,
11645
thinking={"type": "enabled", "budget_tokens": 2048},
11746
betas=[],
11847
),

0 commit comments

Comments
 (0)