Coverage for tinytroupe / agent / browser_faculty.py: 0%

53 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-28 17:48 +0000

1from tinytroupe.agent.mental_faculty import TinyMentalFaculty 

2from tinytroupe.tools import browser 

3import textwrap 

4 

5class BrowserFaculty(TinyMentalFaculty): 

6 """ 

7 A mental faculty that allows an agent to interact with a web browser. 

8 """ 

9 

10 def __init__(self): 

11 super().__init__("Browser Navigation") 

12 

13 def process_action(self, agent, action: dict) -> bool: 

14 """ 

15 Processes a browser-related action. 

16 """ 

17 action_type = action.get("type") 

18 content = action.get("content") 

19 target = action.get("target") 

20 

21 if action_type == "See": 

22 screenshot_path = browser.screenshot() 

23 agent.see(f"Took a screenshot and saved it to {screenshot_path}. I will now analyze the screenshot.") 

24 return True 

25 elif action_type == "Click": 

26 browser.click(target) 

27 agent.see(f"Clicked on element with selector: {target}") 

28 return True 

29 elif action_type == "Write": 

30 browser.fill(target, content) 

31 agent.see(f"Typed '{content}' into element with selector: {target}") 

32 return True 

33 elif action_type == "Submit": 

34 browser.submit_form(target) 

35 agent.see(f"Submitted form with element: {target}") 

36 return True 

37 elif action_type == "Wait": 

38 browser.wait_for_element(target) 

39 agent.see(f"Waited for element: {target}") 

40 return True 

41 elif action_type == "Scroll": 

42 browser.scroll_page(content) 

43 agent.see(f"Scrolled page {content}") 

44 return True 

45 elif action_type == "Hover": 

46 browser.hover_element(target) 

47 agent.see(f"Hovered over element: {target}") 

48 return True 

49 elif action_type == "Keyboard_Key": 

50 browser.press_key(content) 

51 agent.see(f"Pressed key: {content}") 

52 return True 

53 elif action_type == "ScanPage": 

54 page_info = browser.get_page_info() 

55 agent.see(f"Scanned page and found the following information: {page_info}") 

56 return True 

57 return False 

58 

59 def actions_definitions_prompt(self) -> str: 

60 """ 

61 Returns the prompt for defining browser-related actions. 

62 """ 

63 prompt = """ 

64 - See: Take a screenshot of the current page. The `content` will be a placeholder for vision. 

65 - Click: Click on an element on the page. The `target` should be a CSS selector for the element. 

66 - Write: Type text into an element on the page. The `target` should be a CSS selector for the element, and `content` should be the text to type. 

67 - Submit: Submit a form on the page. The `target` should be a CSS selector for a form or an element within a form. 

68 - Wait: Wait for an element to appear on the page. The `target` should be a CSS selector for the element. 

69 - Scroll: Scroll the page. The `content` should be 'up' or 'down'. 

70 - Hover: Hover over an element on the page. The `target` should be a CSS selector for the element. 

71 - Keyboard_Key: Press a key on the keyboard. The `content` should be the key to press (e.g., 'Enter', 'ArrowDown'). 

72 - ScanPage: Get information about the current page, such as links and form elements. 

73 """ 

74 return textwrap.dedent(prompt) 

75 

76 def actions_constraints_prompt(self) -> str: 

77 """ 

78 Returns the prompt for defining constraints on browser-related actions. 

79 """ 

80 prompt = """ 

81 - Use See to get a visual representation of the page to help you decide on the next action. 

82 - Use ScanPage to get a list of interactive elements to help you decide on the next action. 

83 - Use Click, Write, and other actions to interact with elements on the page to accomplish the task. 

84 """ 

85 return textwrap.dedent(prompt)