Coverage for tinytroupe / agent / browser_faculty.py: 0%
53 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-28 17:48 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-28 17:48 +0000
1from tinytroupe.agent.mental_faculty import TinyMentalFaculty
2from tinytroupe.tools import browser
3import textwrap
5class BrowserFaculty(TinyMentalFaculty):
6 """
7 A mental faculty that allows an agent to interact with a web browser.
8 """
10 def __init__(self):
11 super().__init__("Browser Navigation")
13 def process_action(self, agent, action: dict) -> bool:
14 """
15 Processes a browser-related action.
16 """
17 action_type = action.get("type")
18 content = action.get("content")
19 target = action.get("target")
21 if action_type == "See":
22 screenshot_path = browser.screenshot()
23 agent.see(f"Took a screenshot and saved it to {screenshot_path}. I will now analyze the screenshot.")
24 return True
25 elif action_type == "Click":
26 browser.click(target)
27 agent.see(f"Clicked on element with selector: {target}")
28 return True
29 elif action_type == "Write":
30 browser.fill(target, content)
31 agent.see(f"Typed '{content}' into element with selector: {target}")
32 return True
33 elif action_type == "Submit":
34 browser.submit_form(target)
35 agent.see(f"Submitted form with element: {target}")
36 return True
37 elif action_type == "Wait":
38 browser.wait_for_element(target)
39 agent.see(f"Waited for element: {target}")
40 return True
41 elif action_type == "Scroll":
42 browser.scroll_page(content)
43 agent.see(f"Scrolled page {content}")
44 return True
45 elif action_type == "Hover":
46 browser.hover_element(target)
47 agent.see(f"Hovered over element: {target}")
48 return True
49 elif action_type == "Keyboard_Key":
50 browser.press_key(content)
51 agent.see(f"Pressed key: {content}")
52 return True
53 elif action_type == "ScanPage":
54 page_info = browser.get_page_info()
55 agent.see(f"Scanned page and found the following information: {page_info}")
56 return True
57 return False
59 def actions_definitions_prompt(self) -> str:
60 """
61 Returns the prompt for defining browser-related actions.
62 """
63 prompt = """
64 - See: Take a screenshot of the current page. The `content` will be a placeholder for vision.
65 - Click: Click on an element on the page. The `target` should be a CSS selector for the element.
66 - Write: Type text into an element on the page. The `target` should be a CSS selector for the element, and `content` should be the text to type.
67 - Submit: Submit a form on the page. The `target` should be a CSS selector for a form or an element within a form.
68 - Wait: Wait for an element to appear on the page. The `target` should be a CSS selector for the element.
69 - Scroll: Scroll the page. The `content` should be 'up' or 'down'.
70 - Hover: Hover over an element on the page. The `target` should be a CSS selector for the element.
71 - Keyboard_Key: Press a key on the keyboard. The `content` should be the key to press (e.g., 'Enter', 'ArrowDown').
72 - ScanPage: Get information about the current page, such as links and form elements.
73 """
74 return textwrap.dedent(prompt)
76 def actions_constraints_prompt(self) -> str:
77 """
78 Returns the prompt for defining constraints on browser-related actions.
79 """
80 prompt = """
81 - Use See to get a visual representation of the page to help you decide on the next action.
82 - Use ScanPage to get a list of interactive elements to help you decide on the next action.
83 - Use Click, Write, and other actions to interact with elements on the page to accomplish the task.
84 """
85 return textwrap.dedent(prompt)