Spaces:
Runtime error
Runtime error
File size: 4,826 Bytes
d32faf0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
OS_ACTIONS = """
def final_answer(answer: any) -> any:
\"\"\"
Provides a final answer to the given problem.
Args:
answer: The final answer to the problem
\"\"\"
def move_mouse(self, x: float, y: float) -> str:
\"\"\"
Moves the mouse cursor to the specified coordinates
Args:
x: The x coordinate (horizontal position)
y: The y coordinate (vertical position)
\"\"\"
def click(x: Optional[float] = None, y: Optional[float] = None) -> str:
\"\"\"
Performs a left-click at the specified normalized coordinates
Args:
x: The x coordinate (horizontal position)
y: The y coordinate (vertical position)
\"\"\"
def double_click(x: Optional[float] = None, y: Optional[float] = None) -> str:
\"\"\"
Performs a double-click at the specified normalized coordinates
Args:
x: The x coordinate (horizontal position)
y: The y coordinate (vertical position)
\"\"\"
def type(text: str) -> str:
\"\"\"
Types the specified text at the current cursor position.
Args:
text: The text to type
\"\"\"
def press(keys: str | list[str]) -> str:
\"\"\"
Presses a keyboard key
Args:
keys: The key or list of keys to press (e.g. "enter", "space", "backspace", "ctrl", etc.).
\"\"\"
def navigate_back() -> str:
\"\"\"
Goes back to the previous page in the browser. If using this tool doesn't work, just click the button directly.
\"\"\"
def drag(from_coord: list[float], to_coord: list[float]) -> str:
\"\"\"
Clicks [x1, y1], drags mouse to [x2, y2], then release click.
Args:
x1: origin x coordinate
y1: origin y coordinate
x2: end x coordinate
y2: end y coordinate
\"\"\"
def scroll(direction: Literal["up", "down"] = "down", amount: int = 1) -> str:
\"\"\"
Moves the mouse to selected coordinates, then uses the scroll button: this could scroll the page or zoom, depending on the app. DO NOT use scroll to move through linux desktop menus.
Args:
x: The x coordinate (horizontal position) of the element to scroll/zoom, defaults to None to not focus on specific coordinates
y: The y coordinate (vertical position) of the element to scroll/zoom, defaults to None to not focus on specific coordinates
direction: The direction to scroll ("up" or "down"), defaults to "down". For zoom, "up" zooms in, "down" zooms out.
amount: The amount to scroll. A good amount is 1 or 2.
\"\"\"
def wait(seconds: float) -> str:
\"\"\"
Waits for the specified number of seconds. Very useful in case the prior order is still executing (for example starting very heavy applications like browsers or office apps)
Args:
seconds: Number of seconds to wait, generally 2 is enough.
\"\"\"
"""
MOBILE_ACTIONS = """
def navigate_back() -> str:
\"\"\"
Return to home page
\"\"\"
def open_app(app_name: str) -> str:
\"\"\"
Launches the specified application.
Args:
app_name: the name of the application to launch
\"\"\"
def swipe(from_coord: list[str], to_coord: list[str]) -> str:
\"\"\"
swipe from 'from_coord' to 'to_coord'
Args:
from_coord: origin coordinates
to_coord: end coordinates
\"\"\"
def long_press(x: int, y: int) -> str:
\"\"\"
Performs a long-press at the specified coordinates
Args:
x: The x coordinate (horizontal position)
y: The y coordinate (vertical position)
\"\"\"
"""
OS_SYSTEM_PROMPT = f"""You are a helpful GUI agent. Youβll be given a task and a screenshot of the screen. Complete the task using Python function calls.
For each step:
β’ First, <think></think> to express the thought process guiding your next action and the reasoning behind it.
β’ Then, use <code></code> to perform the action. it will be executed in a stateful environment.
The following functions are exposed to the Python interpreter:
<code>
{OS_ACTIONS}
</code>
The state persists between code executions: so if in one step you've created variables or imported modules, these will all persist.
"""
MOBILE_SYSTEM_PROMPT = f"""You are a helpful GUI agent. Youβll be given a task and a screenshot of the screen. Complete the task using Python function calls.
For each step:
β’ First, <think></think> to express the thought process guiding your next action and the reasoning behind it.
β’ Then, use <code></code> to perform the action. it will be executed in a stateful environment.
The following functions are exposed to the Python interpreter:
<code>
# OS ACTIONS
{OS_ACTIONS}
# MOBILE ACTIONS
{MOBILE_ACTIONS}
</code>
The state persists between code executions: so if in one step you've created variables or imported modules, these will all persist.
""" |