|
18 | 18 | from askui.models.shared.tools import Tool, ToolCollection |
19 | 19 | from askui.tools.agent_os import AgentOs |
20 | 20 | from askui.tools.android.agent_os import AndroidAgentOs |
| 21 | +from askui.utils.annotation_writer import AnnotationWriter |
21 | 22 | from askui.utils.image_utils import ImageSource |
22 | 23 | from askui.utils.source_utils import InputSource, load_image_source |
23 | 24 |
|
24 | 25 | from .models import ModelComposition |
25 | 26 | from .models.exceptions import ElementNotFoundError, WaitUntilError |
26 | 27 | from .models.model_router import ModelRouter, initialize_default_model_registry |
27 | 28 | from .models.models import ( |
| 29 | + DetectedElement, |
28 | 30 | ModelChoice, |
29 | 31 | ModelName, |
30 | 32 | ModelRegistry, |
@@ -507,6 +509,101 @@ def locate_all( |
507 | 509 | ) |
508 | 510 | return self._locate(locator=locator, screenshot=screenshot, model=model) |
509 | 511 |
|
| 512 | + @telemetry.record_call(exclude={"locator", "screenshot"}) |
| 513 | + @validate_call(config=ConfigDict(arbitrary_types_allowed=True)) |
| 514 | + def locate_all_elements( |
| 515 | + self, |
| 516 | + screenshot: Optional[InputSource] = None, |
| 517 | + model_composition: ModelComposition | None = None, |
| 518 | + ) -> list[DetectedElement]: |
| 519 | + """Locate all elements in the current screen using AskUI Models. |
| 520 | +
|
| 521 | + Args: |
| 522 | + screenshot (InputSource | None, optional): The screenshot to use for |
| 523 | + locating the elements. Can be a path to an image file, a PIL Image |
| 524 | + object or a data URL. If `None`, takes a screenshot of the currently |
| 525 | + selected display. |
| 526 | + model_composition (ModelComposition | None, optional): The model composition |
| 527 | + to be used for locating the elements. |
| 528 | +
|
| 529 | + Returns: |
| 530 | + list[DetectedElement]: A list of detected elements |
| 531 | +
|
| 532 | + Example: |
| 533 | + ```python |
| 534 | + from askui import VisionAgent |
| 535 | +
|
| 536 | + with VisionAgent() as agent: |
| 537 | + detected_elements = agent.locate_all_elements() |
| 538 | + print(f"Found {len(detected_elements)} elements: {detected_elements}") |
| 539 | + ``` |
| 540 | + """ |
| 541 | + _screenshot = load_image_source( |
| 542 | + self._agent_os.screenshot() if screenshot is None else screenshot |
| 543 | + ) |
| 544 | + return self._model_router.locate_all_elements( |
| 545 | + image=_screenshot, model=model_composition or ModelName.ASKUI |
| 546 | + ) |
| 547 | + |
| 548 | + @telemetry.record_call(exclude={"screenshot", "output_directory"}) |
| 549 | + @validate_call(config=ConfigDict(arbitrary_types_allowed=True)) |
| 550 | + def annotate( |
| 551 | + self, |
| 552 | + screenshot: InputSource | None = None, |
| 553 | + model_composition: ModelComposition | None = None, |
| 554 | + output_directory: str = "reports", |
| 555 | + ) -> None: |
| 556 | + """Annotate the screenshot with the detected elements. |
| 557 | + Creates an interactive HTML file with the detected elements |
| 558 | + and saves it to the output directory. |
| 559 | + The HTML file can be opened in a browser to see the annotated image. |
| 560 | + The user can hover over the elements to see their names and text value |
| 561 | + and click on the box to copy the text value to the clipboard. |
| 562 | +
|
| 563 | + Args: |
| 564 | + screenshot (ImageSource | None, optional): The screenshot to annotate. |
| 565 | + If `None`, takes a screenshot of the currently selected display. |
| 566 | + model_composition (ModelComposition | None, optional): The composition |
| 567 | + or name of the model(s) to be used for locating the elements. |
| 568 | + output_directory (str, optional): The directory to save the annotated |
| 569 | + image. Defaults to "reports". |
| 570 | +
|
| 571 | + Example Using VisionAgent: |
| 572 | + ```python |
| 573 | + from askui import VisionAgent |
| 574 | +
|
| 575 | + with VisionAgent() as agent: |
| 576 | + agent.annotate() |
| 577 | + ``` |
| 578 | +
|
| 579 | + Example Using AndroidVisionAgent: |
| 580 | + ```python |
| 581 | + from askui import AndroidVisionAgent |
| 582 | +
|
| 583 | + with AndroidVisionAgent() as agent: |
| 584 | + agent.annotate() |
| 585 | + ``` |
| 586 | +
|
| 587 | + Example Using VisionAgent with custom screenshot and output directory: |
| 588 | + ```python |
| 589 | + from askui import VisionAgent |
| 590 | +
|
| 591 | + with VisionAgent() as agent: |
| 592 | + agent.annotate(screenshot="screenshot.png", output_directory="htmls") |
| 593 | + ``` |
| 594 | + """ |
| 595 | + if screenshot is None: |
| 596 | + screenshot = self._agent_os.screenshot() |
| 597 | + |
| 598 | + detected_elements = self.locate_all_elements( |
| 599 | + screenshot=screenshot, |
| 600 | + model_composition=model_composition, |
| 601 | + ) |
| 602 | + AnnotationWriter( |
| 603 | + image=screenshot, |
| 604 | + elements=detected_elements, |
| 605 | + ).write_to_file(output_directory) |
| 606 | + |
510 | 607 | @telemetry.record_call(exclude={"until"}) |
511 | 608 | @validate_call(config=ConfigDict(arbitrary_types_allowed=True)) |
512 | 609 | def wait( |
|
0 commit comments